summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.h42
-rw-r--r--lib/Target/AArch64/AArch64.td70
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp347
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.h80
-rw-r--r--lib/Target/AArch64/AArch64BranchFixupPass.cpp600
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td196
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp633
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h108
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp415
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp2975
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h247
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td961
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp822
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h112
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td5099
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp140
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.cpp (renamed from lib/Target/CellSPU/SPUSelectionDAGInfo.cpp)15
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h149
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp171
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h76
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td203
-rw-r--r--lib/Target/AArch64/AArch64Schedule.td (renamed from lib/Target/CellSPU/SPUMachineFunction.cpp)8
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp25
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h (renamed from lib/Target/CellSPU/SPUSelectionDAGInfo.h)17
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp43
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h54
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp81
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h69
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp24
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h31
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2197
-rw-r--r--lib/Target/AArch64/AsmParser/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/AsmParser/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/AsmParser/Makefile15
-rw-r--r--lib/Target/AArch64/CMakeLists.txt36
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp803
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/Disassembler/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/Disassembler/Makefile16
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp408
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h172
-rw-r--r--lib/Target/AArch64/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/AArch64/InstPrinter/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/InstPrinter/Makefile15
-rw-r--r--lib/Target/AArch64/LLVMBuild.txt36
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp585
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp292
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp160
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h27
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h113
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp41
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h (renamed from lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h)21
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp502
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp178
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h167
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp194
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h65
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Makefile (renamed from lib/Target/CellSPU/MCTargetDesc/Makefile)4
-rw-r--r--lib/Target/AArch64/Makefile30
-rw-r--r--lib/Target/AArch64/README.txt2
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp24
-rw-r--r--lib/Target/AArch64/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/TargetInfo/LLVMBuild.txt (renamed from lib/Target/CellSPU/TargetInfo/LLVMBuild.txt)9
-rw-r--r--lib/Target/AArch64/TargetInfo/Makefile (renamed from lib/Target/CellSPU/TargetInfo/Makefile)4
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp1103
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h1068
-rw-r--r--lib/Target/AArch64/Utils/CMakeLists.txt5
-rw-r--r--lib/Target/AArch64/Utils/LLVMBuild.txt (renamed from lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt)10
-rw-r--r--lib/Target/AArch64/Utils/Makefile15
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td35
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp701
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h10
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp71
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp431
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h23
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp26
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp18
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp29
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h6
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp297
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp126
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp445
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp699
-rw-r--r--lib/Target/ARM/ARMISelLowering.h38
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td169
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td24
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td26
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMJITInfo.h4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp200
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h4
-rw-r--r--lib/Target/ARM/ARMSchedule.td71
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td56
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td61
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp126
-rw-r--r--lib/Target/ARM/ARMSubtarget.h34
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp32
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h38
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp458
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp138
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp645
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt3
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp42
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp59
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rwxr-xr-xlib/Target/ARM/LICENSE.TXT47
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp153
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp23
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp418
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp23
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp20
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h112
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp8
-rw-r--r--lib/Target/ARM/Makefile2
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp48
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp91
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h8
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp6
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp383
-rw-r--r--lib/Target/CMakeLists.txt3
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt30
-rw-r--r--lib/Target/CellSPU/CellSDKIntrinsics.td449
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp43
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp94
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h38
-rw-r--r--lib/Target/CellSPU/Makefile20
-rw-r--r--lib/Target/CellSPU/README.txt106
-rw-r--r--lib/Target/CellSPU/SPU.h31
-rw-r--r--lib/Target/CellSPU/SPU.td66
-rw-r--r--lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td408
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp333
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td53
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp256
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.h80
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp135
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h37
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp1192
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp3266
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h178
-rw-r--r--lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td320
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp449
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h84
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td4484
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.h50
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--lib/Target/CellSPU/SPUNodes.td159
-rw-r--r--lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--lib/Target/CellSPU/SPUOperands.td664
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp357
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h106
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td183
-rw-r--r--lib/Target/CellSPU/SPURegisterNames.h19
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td59
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp65
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h97
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp94
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h96
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp20
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp111
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h2
-rw-r--r--lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp2
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt5
-rw-r--r--lib/Target/Hexagon/Hexagon.h8
-rw-r--r--lib/Target/Hexagon/Hexagon.td101
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp36
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h4
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp183
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp32
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h5
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp1673
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp176
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp105
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h17
-rw-r--r--lib/Target/Hexagon/HexagonImmediates.td508
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td445
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td65
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp1280
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h27
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td2336
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td6048
-rw-r--r--lib/Target/Hexagon/HexagonMCInst.h41
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h8
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp36
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td858
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp159
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h14
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td24
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td14
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp13
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp60
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h19
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp598
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp42
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h17
-rw-r--r--lib/Target/Hexagon/InstPrinter/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h141
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp175
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h100
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp2
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp115
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp111
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt1
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp9
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h5
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp26
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp47
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.h4
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp14
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp48
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td4
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp14
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeMachineFunction.h2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp86
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h10
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.h2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h20
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.cpp8
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp8
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp6
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp2
-rw-r--r--lib/Target/MBlaze/Makefile3
-rw-r--r--lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp2
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp2
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp14
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp6
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td3
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp76
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h7
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp12
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp127
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td4
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp6
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp85
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h7
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h17
-rw-r--r--lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--lib/Target/Mangler.cpp10
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp844
-rw-r--r--lib/Target/Mips/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/Disassembler/Makefile2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp50
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp10
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h3
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h93
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp22
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp89
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp29
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp80
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsReginfo.h31
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/Mips.h1
-rw-r--r--lib/Target/Mips/Mips.td16
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp71
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h6
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td111
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp264
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h66
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td443
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp86
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h20
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td370
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp77
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h3
-rw-r--r--lib/Target/Mips/MipsCallingConv.td18
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp180
-rw-r--r--lib/Target/Mips/MipsCondMov.td181
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp85
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td5
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td280
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp778
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp8
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h7
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp690
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1710
-rw-r--r--lib/Target/Mips/MipsISelLowering.h207
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td514
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td552
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp170
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h45
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td1161
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp54
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp20
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h18
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp61
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h11
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td140
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp258
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h7
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp473
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp442
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h62
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp87
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h26
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp59
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp9
-rw-r--r--lib/Target/Mips/MipsSubtarget.h38
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp17
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h31
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp22
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h38
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp12
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp11
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h1
-rw-r--r--lib/Target/NVPTX/NVPTX.h34
-rw-r--r--lib/Target/NVPTX/NVPTX.td12
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp22
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp998
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h119
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp54
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h12
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp1642
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h17
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1260
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h47
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp143
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h31
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td96
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td145
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp71
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h4
-rw-r--r--lib/Target/NVPTX/NVPTXNumRegisters.h6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp264
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h31
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td44
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp20
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.h2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp22
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h14
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp85
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h59
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h77
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp118
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h18
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp32
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp177
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp6
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp1248
-rw-r--r--lib/Target/NVPTX/cl_common_defines.h123
-rw-r--r--lib/Target/NVPTX/gen-register-defs.py202
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp30
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp56
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp124
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h21
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp75
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h8
-rw-r--r--lib/Target/PowerPC/PPC.h26
-rw-r--r--lib/Target/PowerPC/PPC.td149
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp525
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp17
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp103
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td68
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp20
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp250
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h21
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp333
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1260
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h191
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td680
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td635
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td22
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp268
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td932
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp19
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp9
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h31
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp414
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h48
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td176
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td15
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td15
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp23
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h30
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp18
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h20
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp240
-rw-r--r--lib/Target/PowerPC/README.txt21
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--lib/Target/R600/AMDGPU.h51
-rw-r--r--lib/Target/R600/AMDGPU.td41
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp145
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h44
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td42
-rw-r--r--lib/Target/R600/AMDGPUConvertToISA.cpp62
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.cpp122
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.h44
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp414
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h140
-rw-r--r--lib/Target/R600/AMDGPUIndirectAddressing.cpp343
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp267
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h206
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td82
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td266
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td60
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp83
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h34
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp22
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h29
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp75
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h66
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.td25
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp896
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp87
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h65
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp164
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h70
-rw-r--r--lib/Target/R600/AMDIL.h121
-rw-r--r--lib/Target/R600/AMDIL7XXDevice.cpp115
-rw-r--r--lib/Target/R600/AMDIL7XXDevice.h72
-rw-r--r--lib/Target/R600/AMDILBase.td85
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp3051
-rw-r--r--lib/Target/R600/AMDILDevice.cpp132
-rw-r--r--lib/Target/R600/AMDILDevice.h117
-rw-r--r--lib/Target/R600/AMDILDeviceInfo.cpp94
-rw-r--r--lib/Target/R600/AMDILDeviceInfo.h88
-rw-r--r--lib/Target/R600/AMDILDevices.h19
-rw-r--r--lib/Target/R600/AMDILEvergreenDevice.cpp169
-rw-r--r--lib/Target/R600/AMDILEvergreenDevice.h93
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp643
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp647
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td207
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.cpp79
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.h49
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td232
-rw-r--r--lib/Target/R600/AMDILNIDevice.cpp65
-rw-r--r--lib/Target/R600/AMDILNIDevice.h57
-rw-r--r--lib/Target/R600/AMDILPeepholeOptimizer.cpp1215
-rw-r--r--lib/Target/R600/AMDILRegisterInfo.td107
-rw-r--r--lib/Target/R600/AMDILSIDevice.cpp48
-rw-r--r--lib/Target/R600/AMDILSIDevice.h39
-rw-r--r--lib/Target/R600/CMakeLists.txt59
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp172
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h54
-rw-r--r--lib/Target/R600/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/R600/InstPrinter/LLVMBuild.txt24
-rw-r--r--lib/Target/R600/InstPrinter/Makefile15
-rw-r--r--lib/Target/R600/LLVMBuild.txt (renamed from lib/Target/CellSPU/LLVMBuild.txt)14
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp90
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp83
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h30
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h40
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp113
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h55
-rw-r--r--lib/Target/R600/MCTargetDesc/CMakeLists.txt10
-rw-r--r--lib/Target/R600/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/R600/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp585
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp201
-rw-r--r--lib/Target/R600/Makefile23
-rw-r--r--lib/Target/R600/Processors.td30
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp268
-rw-r--r--lib/Target/R600/R600Defines.h97
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp255
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp297
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp1106
-rw-r--r--lib/Target/R600/R600ISelLowering.h74
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp841
-rw-r--r--lib/Target/R600/R600InstrInfo.h204
-rw-r--r--lib/Target/R600/R600Instructions.td2267
-rw-r--r--lib/Target/R600/R600Intrinsics.td31
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp18
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h32
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp427
-rw-r--r--lib/Target/R600/R600MachineScheduler.h120
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp99
-rw-r--r--lib/Target/R600/R600RegisterInfo.h55
-rw-r--r--lib/Target/R600/R600RegisterInfo.td209
-rw-r--r--lib/Target/R600/R600Schedule.td36
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp329
-rw-r--r--lib/Target/R600/SIISelLowering.cpp670
-rw-r--r--lib/Target/R600/SIISelLowering.h58
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp358
-rw-r--r--lib/Target/R600/SIInstrFormats.td426
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp264
-rw-r--r--lib/Target/R600/SIInstrInfo.h97
-rw-r--r--lib/Target/R600/SIInstrInfo.td356
-rw-r--r--lib/Target/R600/SIInstructions.td1607
-rw-r--r--lib/Target/R600/SIIntrinsics.td42
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp501
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp18
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h33
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp53
-rw-r--r--lib/Target/R600/SIRegisterInfo.h50
-rw-r--r--lib/Target/R600/SIRegisterInfo.td182
-rw-r--r--lib/Target/R600/SISchedule.td15
-rw-r--r--lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp26
-rw-r--r--lib/Target/R600/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/R600/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/R600/TargetInfo/Makefile15
-rw-r--r--lib/Target/README.txt15
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp6
-rw-r--r--lib/Target/Sparc/FPMover.cpp6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td20
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp22
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h4
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp164
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h19
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td285
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td37
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td199
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp46
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h11
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td14
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h17
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--lib/Target/Target.cpp5
-rw-r--r--lib/Target/TargetInstrInfo.cpp88
-rw-r--r--lib/Target/TargetIntrinsicInfo.cpp2
-rw-r--r--lib/Target/TargetLibraryInfo.cpp283
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp40
-rw-r--r--lib/Target/TargetMachine.cpp34
-rw-r--r--lib/Target/TargetMachineC.cpp16
-rw-r--r--lib/Target/TargetRegisterInfo.cpp248
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp4
-rw-r--r--lib/Target/TargetTransformImpl.cpp353
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp159
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp1129
-rw-r--r--lib/Target/X86/CMakeLists.txt3
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp9
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h4
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c347
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp12
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp135
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp34
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h27
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp45
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp8
-rw-r--r--lib/Target/X86/Makefile3
-rw-r--r--lib/Target/X86/README-SSE.txt9
-rw-r--r--lib/Target/X86/README.txt37
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp18
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h2
-rw-r--r--lib/Target/X86/X86.h11
-rw-r--r--lib/Target/X86/X86.td101
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp73
-rw-r--r--lib/Target/X86/X86AsmPrinter.h8
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h2
-rw-r--r--lib/Target/X86/X86CallingConv.td102
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp36
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm4
-rw-r--r--lib/Target/X86/X86FastISel.cpp169
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp12
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp342
-rw-r--r--lib/Target/X86/X86FrameLowering.h6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp38
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp3418
-rw-r--r--lib/Target/X86/X86ISelLowering.h116
-rw-r--r--lib/Target/X86/X86Instr3DNow.td15
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td577
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td21
-rw-r--r--lib/Target/X86/X86InstrCompiler.td331
-rw-r--r--lib/Target/X86/X86InstrControl.td72
-rw-r--r--lib/Target/X86/X86InstrExtension.td73
-rw-r--r--lib/Target/X86/X86InstrFMA.td74
-rw-r--r--lib/Target/X86/X86InstrFPStack.td26
-rw-r--r--lib/Target/X86/X86InstrFormats.td163
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td12
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp746
-rw-r--r--lib/Target/X86/X86InstrInfo.td336
-rw-r--r--lib/Target/X86/X86InstrMMX.td80
-rw-r--r--lib/Target/X86/X86InstrSSE.td2554
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td58
-rw-r--r--lib/Target/X86/X86InstrSystem.td32
-rw-r--r--lib/Target/X86/X86InstrTSX.td9
-rw-r--r--lib/Target/X86/X86JITInfo.cpp36
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp120
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp212
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp296
-rw-r--r--lib/Target/X86/X86RegisterInfo.h7
-rw-r--r--lib/Target/X86/X86SchedHaswell.td126
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td122
-rw-r--r--lib/Target/X86/X86Schedule.td94
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td2
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp10
-rw-r--r--lib/Target/X86/X86Subtarget.cpp157
-rw-r--r--lib/Target/X86/X86Subtarget.h61
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp58
-rw-r--r--lib/Target/X86/X86TargetMachine.h32
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp16
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h8
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp495
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp10
-rw-r--r--lib/Target/XCore/CMakeLists.txt4
-rw-r--r--lib/Target/XCore/Disassembler/CMakeLists.txt5
-rw-r--r--lib/Target/XCore/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/Disassembler/Makefile16
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp800
-rw-r--r--lib/Target/XCore/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/XCore/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/InstPrinter/Makefile16
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp97
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h44
-rw-r--r--lib/Target/XCore/LLVMBuild.txt3
-rw-r--r--lib/Target/XCore/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp14
-rw-r--r--lib/Target/XCore/Makefile6
-rw-r--r--lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--lib/Target/XCore/XCore.td6
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp80
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp71
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h4
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp27
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp162
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td269
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp8
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td1108
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.cpp117
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h42
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp96
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h7
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td11
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h15
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.cpp2
712 files changed, 87354 insertions, 43428 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
new file mode 100644
index 000000000000..4de4faa58182
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+FunctionPass *createAArch64BranchFixupPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
new file mode 100644
index 000000000000..e17052b4a565
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.td
@@ -0,0 +1,70 @@
+//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+ let InstructionSet = AArch64InstrInfo;
+ let AssemblyWriters = [A64InstPrinter];
+}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
new file mode 100644
index 000000000000..47ebb826e0d0
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+ // expected to be created.
+ assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ if (!MO.isReg())
+ return true;
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+ if (MO.isImm() && MO.getImm() == 0) {
+ O << Prefix << "zr";
+ return false;
+ } else if (MO.isReg()) {
+ if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+ O << (Prefix == 'x' ? "sp" : "wsp");
+ return false;
+ }
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O) {
+ StringRef Name;
+ StringRef Modifier;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for symbolic address constraint");
+ case MachineOperand::MO_GlobalAddress:
+ Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+ // Global variables may be accessed either via a GOT or in various fun and
+ // interesting TLS-model specific ways. Set the prefix modifier as
+ // appropriate here.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (GV->isThreadLocal()) {
+ switch (TM.getTLSModel(GV)) {
+ case TLSModel::GeneralDynamic:
+ Modifier = "tlsdesc";
+ break;
+ case TLSModel::LocalDynamic:
+ Modifier = "dtprel";
+ break;
+ case TLSModel::InitialExec:
+ Modifier = "gottprel";
+ break;
+ case TLSModel::LocalExec:
+ Modifier = "tprel";
+ break;
+ }
+ } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ Modifier = "got";
+ }
+ }
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Name = MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Name = GetCPISymbol(MO.getIndex())->getName();
+ break;
+ }
+
+ // Some instructions (notably ADRP) don't take the # prefix for
+ // immediates. Only print it if asked to.
+ if (PrintImmediatePrefix)
+ O << '#';
+
+ // Only need the joining "_" if both the prefix and the suffix are
+ // non-null. This little block simply takes care of the four possibly
+ // combinations involved there.
+ if (Modifier == "" && Suffix == "")
+ O << Name;
+ else if (Modifier == "" && Suffix != "")
+ O << ":" << Suffix << ':' << Name;
+ else if (Modifier != "" && Suffix == "")
+ O << ":" << Modifier << ':' << Name;
+ else
+ O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+ return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!ExtraCode || !ExtraCode[0]) {
+ // There's actually no operand modifier, which leads to a slightly eclectic
+ // set of behaviour which we have to handle here.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for inline assembly");
+ case MachineOperand::MO_Register:
+ // GCC prints the unmodified operand of a 'w' constraint as the vector
+ // register. Technically, we could allocate the argument as a VPR128, but
+ // that leads to extremely dodgy copies being generated to get the data
+ // there.
+ if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+ O << AArch64InstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << '#' << MO.getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+ O << "#0.0";
+ break;
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return printSymbolicAddress(MO, false, "", O);
+ }
+ return false;
+ }
+
+ // We have a real modifier to handle.
+ switch(ExtraCode[0]) {
+ default:
+ // See if this is a generic operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'w':
+ // Output 32-bit general register operand, constant zero as wzr, or stack
+ // pointer as wsp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR32RegClass, O);
+ case 'x':
+ // Output 64-bit general register operand, constant zero as xzr, or stack
+ // pointer as sp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR64RegClass, O);
+ case 'H':
+ // Output higher numbered of a 64-bit general register pair
+ case 'Q':
+ // Output least significant register of a 64-bit general register pair
+ case 'R':
+ // Output most significant register of a 64-bit general register pair
+
+ // FIXME note: these three operand modifiers will require, to some extent,
+ // adding a paired GPR64 register class. Initial investigation suggests that
+ // assertions are hit unless it has a type and is made legal for that type
+ // in ISelLowering. After that step is made, the number of modifications
+ // needed explodes (operation legality, calling conventions, stores, reg
+ // copies ...).
+ llvm_unreachable("FIXME: Unimplemented register pairs");
+ case 'b':
+ // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR8RegClass, O);
+ case 'h':
+ // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR16RegClass, O);
+ case 's':
+ // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR32RegClass, O);
+ case 'd':
+ // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR64RegClass, O);
+ case 'q':
+ // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR128RegClass, O);
+ case 'A':
+ // Output symbolic address with appropriate relocation modifier (also
+ // suitable for ADRP).
+ return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+ case 'L':
+ // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+ // modifier.
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+ case 'G':
+ // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+ // modifier (currently only for TLS local exec).
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+ }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Currently both the memory constraints (m and Q) behave the same and amount
+ // to the address as a single register. In future, we may allow "m" to provide
+ // both a base and an offset.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline assembly memory operand");
+ O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+ return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+ OS << '+' << MI->getOperand(1).getImm();
+ OS << ']';
+ OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ switch (MI->getOpcode()) {
+ case AArch64::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(0), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+ RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h
new file mode 100644
index 000000000000..af0c9fed066f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.h
@@ -0,0 +1,80 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ public:
+ explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ }
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ MCOperand lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const;
+
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitEndOfAsmFile(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+ /// reference, print out the appropriate asm string to represent it. If
+ /// appropriate, a relocation-specifier will be produced, composed of a
+ /// general class derived from the MO parameter and an instruction-specific
+ /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+ /// given.
+ bool printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ virtual const char *getPassName() const {
+ return "AArch64 Assembly Printer";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
new file mode 100644
index 000000000000..71233ba5c3dc
--- /dev/null
+++ b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
@@ -0,0 +1,600 @@
+//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that fixes AArch64 branches which have ended up out
+// of range for their immediate operands.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-branch-fixup"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+
+/// Return the worst case padding that could result from unknown offset bits.
+/// This does not include alignment padding caused by known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+namespace {
+ /// Due to limited PC-relative displacements, conditional branches to distant
+ /// blocks may need converting into an unconditional equivalent. For example:
+ /// tbz w1, #0, far_away
+ /// becomes
+ /// tbnz w1, #0, skip
+ /// b far_away
+ /// skip:
+ class AArch64BranchFixup : public MachineFunctionPass {
+ /// Information about the offset and size of a single basic block.
+ struct BasicBlockInfo {
+ /// Distance from the beginning of the function to the beginning of this
+ /// basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size of the basic block in bytes. If the block contains inline
+ /// assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// The number of low bits in Offset that are known to be exact. The
+ /// remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// When non-zero, the block contains instructions (inline asm) of unknown
+ /// size. The real size may be smaller than Size bytes by a multiple of 1
+ /// << Unalign.
+ uint8_t Unalign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ if (!LogAlign)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LogAlign, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(LogAlign, internalKnownBits());
+ }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// One per immediate branch, keeping the machine instruction pointer,
+ /// conditional or unconditional, the max displacement, and (if IsCond is
+ /// true) the corresponding inverted branch opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned OffsetBits : 31;
+ bool IsCond : 1;
+ ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
+ : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
+ };
+
+ /// Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ MachineFunction *MF;
+ const AArch64InstrInfo *TII;
+ public:
+ static char ID;
+ AArch64BranchFixup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AArch64 branch fixup pass";
+ }
+
+ private:
+ void initializeFunctionInfo();
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned OffsetBits);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify();
+ };
+ char AArch64BranchFixup::ID = 0;
+}
+
+/// check BBOffsets
+void AArch64BranchFixup::verify() {
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ unsigned MBBId = MBB->getNumber();
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void AArch64BranchFixup::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
+
+/// Returns an instance of the branch fixup pass.
+FunctionPass *llvm::createAArch64BranchFixupPass() {
+ return new AArch64BranchFixup();
+}
+
+bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ DEBUG(dbgs() << "***** AArch64BranchFixup ******");
+ TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block and location of each immediate branch.
+ initializeFunctionInfo();
+
+ // Iteratively fix up branches until there is no change.
+ unsigned NoBRIters = 0;
+ bool MadeChange = false;
+ while (true) {
+ DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ ImmBranches.clear();
+
+ return MadeChange;
+}
+
+/// Return true if the specified basic block can fallthrough into the block
+/// immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// Do the initial scan of the function, building up information about the sizes
+/// of each block, and each immediate branch.
+void AArch64BranchFixup::initializeFunctionInfo() {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool IsCond = false;
+
+ // The offsets encoded in instructions here scale by the instruction
+ // size (4 bytes), effectively increasing their range by 2 bits.
+ unsigned Bits = 0;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case AArch64::TBZxii:
+ case AArch64::TBZwii:
+ case AArch64::TBNZxii:
+ case AArch64::TBNZwii:
+ IsCond = true;
+ Bits = 14 + 2;
+ break;
+ case AArch64::Bcc:
+ case AArch64::CBZx:
+ case AArch64::CBZw:
+ case AArch64::CBNZx:
+ case AArch64::CBNZw:
+ IsCond = true;
+ Bits = 19 + 2;
+ break;
+ case AArch64::Bimm:
+ Bits = 26 + 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
+ }
+ }
+ }
+}
+
+/// Compute the size and some alignment information for MBB. This function
+/// updates BBInfo directly.
+void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->getInstSizeInBytes(*I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = 2;
+ }
+}
+
+/// Return the current offset of the specified machine instruction from the
+/// start of the function. This offset changes as stuff is moved around inside
+/// the function.
+unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+ return Offset;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+/// Returns true if the distance between specific MI and specific BB can fit in
+/// MI's displacement field.
+bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned OffsetBits) {
+ int64_t BrOffset = getOffsetOf(MI);
+ int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " bits available=" << OffsetBits
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ return isIntN(OffsetBits, DestOffset - BrOffset);
+}
+
+/// Fix up an immediate branch whose destination is too far away to fit in its
+/// displacement field.
+bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isMBB()) {
+ DestBB = MI->getOperand(i).getMBB();
+ break;
+ }
+ }
+ assert(DestBB && "Branch with no destination BB?");
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.OffsetBits))
+ return false;
+
+ assert(Br.IsCond && "Only conditional branches should need fixup");
+ return fixupConditionalBr(Br);
+}
+
+/// Fix up a conditional branch whose destination is too far away to fit in its
+/// displacement field. It is converted to an inverse conditional branch + an
+/// unconditional branch to the destination.
+bool
+AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned CondBrMBBOperand = 0;
+
+ // The general idea is to add an unconditional branch to the destination and
+ // invert the conditional branch to jump over it. Complications occur around
+ // fallthrough and unreachable ends to the block.
+ // b.lt L1
+ // =>
+ // b.ge L2
+ // b L1
+ // L2:
+
+ // First we invert the conditional branch, by creating a replacement if
+ // necessary. This if statement contains all the special handling of different
+ // branch types.
+ if (MI->getOpcode() == AArch64::Bcc) {
+ // The basic block is operand number 1 for Bcc
+ CondBrMBBOperand = 1;
+
+ A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
+ CC = A64InvertCondCode(CC);
+ MI->getOperand(0).setImm(CC);
+ } else {
+ MachineInstrBuilder InvertedMI;
+ int InvertedOpcode;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown branch type");
+ case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
+ case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
+ case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
+ case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
+ case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
+ case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
+ case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
+ case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
+ }
+
+ InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
+ for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
+ InvertedMI.addOperand(MI->getOperand(i));
+ if (MI->getOperand(i).isMBB())
+ CondBrMBBOperand = i;
+ }
+
+ MI->eraseFromParent();
+ MI = Br.MI = InvertedMI;
+ }
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through
+ // block. Otherwise, split the MBB before the next instruction.
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == AArch64::Bimm) {
+ // Last MI in the BB is an unconditional branch. We can swap destinations:
+ // b.eq L1 (temporarily b.ne L1 after first change)
+ // b L2
+ // =>
+ // b.ne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ MachineBasicBlock::iterator MBBI = MI; ++MBBI;
+ splitBlockBeforeInstr(MBBI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->getInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+
+ // After splitting and removing the unconditional branch from the original BB,
+ // the structure is now:
+ // oldbb:
+ // [things]
+ // b.invertedCC L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ //
+ // We now have to change the conditional branch to point to splitbb and add an
+ // unconditional branch after it to L1, giving the final structure:
+ // oldbb:
+ // [things]
+ // b.invertedCC splitbb
+ // b L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#"
+ << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new unconditional branch and fixup the destination of the
+ // conditional one. Also update the ImmBranch as well as adding a new entry
+ // for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
+ .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
+ MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
+
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+
+ // 26 bits written down in Bimm, specifying a multiple of 4.
+ unsigned OffsetBits = 26 + 2;
+ ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
+
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
new file mode 100644
index 000000000000..b880d8373deb
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+// 1. Expose *some* way to express the concepts required to implement the
+// generic PCS from a front-end.
+// 2. Provide a sane ABI for pure LLVM.
+// 3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+// * Integer, float and vector arguments of all sizes which end up in
+// registers are passed and returned via the natural LLVM type.
+// * Structure arguments with size <= 16 bytes are passed and returned in
+// registers as similar integer or composite types. For example:
+// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+// * HFAs in registers follow rules similar to small structs: appropriate
+// composite types.
+// * Structure arguments with size > 16 bytes are passed via a pointer,
+// handled completely by the front-end.
+// * Structure return values > 16 bytes via an sret pointer argument.
+// * Other stack-based arguments (not large structs) are passed using byval
+// pointers. Padding arguments are added beforehand to guarantee a large
+// struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+ CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+ // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+ // concerns. However, this rule will be used by C/C++ frontends to implement
+ // structure return.
+ CCIfSRet<CCAssignToReg<[X8]>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Canonicalise the various types that live in different floating-point
+ // registers. This makes sense because the PCS does not distinguish Short
+ // Vectors and Floating-point types.
+ CCIfType<[v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCBitConvertToType<f128>>,
+
+ // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+ // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+ // argument is allocated to the least significant bits of register
+ // v[NSRN]. The NSRN is incremented by one. The argument has now been
+ // allocated."
+ CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+ // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+ // argument is allocated to SIMD and Floating-point registers (with one
+ // register per element of the HFA). The NSRN is incremented by the number of
+ // registers used. The argument has now been allocated."
+ //
+ // N.b. As above, this rule is the responsibility of the front-end.
+
+ // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+ // the argument is rounded up to the nearest multiple of 8 bytes."
+ //
+ // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+ // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the Argument's type."
+ //
+ // It is expected that these will be satisfied by adding dummy arguments to
+ // the prototype.
+
+ // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+ // type then the size of the argument is set to 8 bytes. The effect is as if
+ // the argument had been copied to the least significant bits of a 64-bit
+ // register and the remaining bits filled with unspecified values."
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+ // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+ // precision Floating-point or Short Vector Type, then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+ // argument. The argument has now been allocated."
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+ // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+ // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+ // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+ // one. The argument has now been allocated."
+
+ // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+ // represented as two i64s, the first one being split. If we delayed this
+ // operation C.8 would never be reached.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+ // Note: the promotion also implements C.14.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // And now the real implementation of C.7
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+ // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+ // up to the next even number."
+ //
+ // "C.9: If the argument is an Integral Type, the size of the argument is
+ // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+ // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+ // memory representation of the argument. The NGRN is incremented by two. The
+ // argument has now been allocated."
+ //
+ // Subtlety here: what if alignment is 16 but it is not an integral type? All
+ // floating-point types have been allocated already, which leaves composite
+ // types: this is why a front-end may need to produce i128 for a struct <= 16
+ // bytes.
+
+ // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+ // of the argument is not more than 8 minus NGRN, then the argument is copied
+ // into consecutive general-purpose registers, starting at x[NGRN]. The
+ // argument is passed as though it had been loaded into the registers from a
+ // double-word aligned address with an appropriate sequence of LDR
+ // instructions loading consecutive registers from memory (the contents of any
+ // unused parts of the registers are unspecified by this standard). The NGRN
+ // is incremented by the number of registers used. The argument has now been
+ // allocated."
+ //
+ // Another one that's the responsibility of the front-end (sigh).
+
+ // PCS: "C.11: The NGRN is set to 8."
+ CCCustom<"CC_AArch64NoMoreRegs">,
+
+ // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the argument's type."
+ //
+ // PCS: "C.13: If the argument is a composite type then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is by the size of the
+ // argument. The argument has now been allocated."
+ //
+ // Note that the effect of this corresponds to a memcpy rather than register
+ // stores so that the struct ends up correctly addressable at the adjusted
+ // NSAA.
+
+ // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+ // of the argument is set to 8 bytes. The effect is as if the argument was
+ // copied to the least significant bits of a 64-bit register and the remaining
+ // bits filled with unspecified values."
+ //
+ // Integer types were widened above. Floating-point and composite types have
+ // already been allocated completely. Nothing to do.
+
+ // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+ // is incremented by the size of the argument. The argument has now been
+ // allocated."
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+ (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+ (sequence "Q%u", 31, 0))>;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
new file mode 100644
index 000000000000..dc41f2f60525
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -0,0 +1,633 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+ uint64_t &Initial,
+ uint64_t &Residual) const {
+ // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+ // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+ // 0x1f8, but stack adjustment should always be a multiple of 16.
+ if (Total <= 0x1f0) {
+ Initial = Total;
+ Residual = 0;
+ } else {
+ Initial = 0x1f0;
+ Residual = Total - Initial;
+ }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool NeedsFrameMoves = MMI.hasDebugInfo()
+ || MF.getFunction()->needsUnwindTableEntry();
+
+ uint64_t NumInitialBytes, NumResidualBytes;
+
+ // Currently we expect the stack to be laid out by
+ // sub sp, sp, #initial
+ // stp x29, x30, [sp, #offset]
+ // ...
+ // str xxx, [sp, #offset]
+ // sub sp, sp, #rest (possibly via extra instructions).
+ if (MFI->getCalleeSavedInfo().size()) {
+ // If there are callee-saved registers, we want to store them efficiently as
+ // a block, and virtual base assignment happens too early to do it for us so
+ // we adjust the stack in two phases: first just for callee-saved fiddling,
+ // then to allocate the rest of the frame.
+ splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+ } else {
+ // If there aren't any callee-saved registers, two-phase adjustment is
+ // inefficient. It's more efficient to adjust with NumInitialBytes too
+ // because when we're in a "callee pops argument space" situation, that pop
+ // must be tacked onto Initial for correctness.
+ NumInitialBytes = MFI->getStackSize();
+ NumResidualBytes = 0;
+ }
+
+ // Tell everyone else how much adjustment we're expecting them to use. In
+ // particular if an adjustment is required for a tail call the epilogue could
+ // have a different view of things.
+ FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+ MachineInstr::FrameSetup);
+
+ if (NeedsFrameMoves && NumInitialBytes) {
+ // We emit this update even if the CFA is set from a frame pointer later so
+ // that the CFA is valid in the interim.
+ MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(SPLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumInitialBytes);
+ Moves.push_back(MachineMove(SPLabel, Dst, Src));
+ }
+
+ // Otherwise we need to set the frame pointer and/or add a second stack
+ // adjustment.
+
+ bool FPNeedsSetting = hasFP(MF);
+ for (; MBBI != MBB.end(); ++MBBI) {
+ // Note that this search makes strong assumptions about the operation used
+ // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+ // change in future, but until then there's no point in implementing
+ // untestable more generic cases.
+ if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+ && MBBI->getOperand(0).getReg() == AArch64::X29) {
+ int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+ FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+ ++MBBI;
+ emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+ AArch64::X29,
+ NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+ MachineInstr::FrameSetup);
+
+ // The offset adjustment used when emitting debugging locations relative
+ // to whatever frame base is set. AArch64 uses the default frame base (FP
+ // or SP) and this adjusts the calculations to be correct.
+ MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+ - MFI->getStackSize());
+
+ if (NeedsFrameMoves) {
+ MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(FPLabel);
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+ Moves.push_back(MachineMove(FPLabel, Dst, Src));
+ }
+
+ FPNeedsSetting = false;
+ }
+
+ if (!MBBI->getFlag(MachineInstr::FrameSetup))
+ break;
+ }
+
+ assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+ MachineInstr::FrameSetup);
+
+ // Now we emit the rest of the frame setup information, if necessary: we've
+ // already noted the FP and initial SP moves so we're left with the prologue's
+ // final SP update and callee-saved register locations.
+ if (!NeedsFrameMoves)
+ return;
+
+ // Reuse the label if appropriate, so create it in this outer scope.
+ MCSymbol *CSLabel = 0;
+
+ // The rest of the stack adjustment
+ if (!hasFP(MF) && NumResidualBytes) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+
+ // And any callee-saved registers (it's fine to leave them to the end here,
+ // because the old values are still valid at this point.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.size()) {
+ if (!CSLabel) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+ }
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ MachineLocation Dst(MachineLocation::VirtualFP,
+ MFI->getObjectOffset(I->getFrameIdx()));
+ MachineLocation Src(I->getReg());
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+ }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ // Initial and residual are named for consitency with the prologue. Note that
+ // in the epilogue, the residual adjustment is executed first.
+ uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+ uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+ uint64_t ArgumentPopSize = 0;
+ if (RetOpcode == AArch64::TC_RETURNdi ||
+ RetOpcode == AArch64::TC_RETURNxi) {
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ MachineInstrBuilder MIB;
+ if (RetOpcode == AArch64::TC_RETURNdi) {
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+ if (JumpTarget.isGlobal()) {
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else {
+ assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else {
+ assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+ && "Unexpected tail call");
+
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+ MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ // Add the extra operands onto the new tail call instruction even though
+ // they're not used directly (so that liveness is tracked properly etc).
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ MIB->addOperand(MBBI->getOperand(i));
+
+
+ // Delete the pseudo instruction TC_RETURN.
+ MachineInstr *NewMI = prior(MBBI);
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+ }
+
+ assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+ && "refusing to adjust stack by misaligned amt");
+
+ // We may need to address callee-saved registers differently, so find out the
+ // bound on the frame indices.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The "residual" stack update comes first from this direction and guarantees
+ // that SP is NumInitialBytes below its value on function entry, either by a
+ // direct update or restoring it from the frame pointer.
+ if (NumInitialBytes + ArgumentPopSize != 0) {
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+ NumInitialBytes + ArgumentPopSize);
+ --MBBI;
+ }
+
+
+ // MBBI now points to the instruction just past the last callee-saved
+ // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+ // otherwise).
+
+ // Now we need to find out where to put the bulk of the stack adjustment
+ MachineBasicBlock::iterator FirstEpilogue = MBBI;
+ while (MBBI != MBB.begin()) {
+ --MBBI;
+
+ unsigned FrameOp;
+ for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+ if (MBBI->getOperand(FrameOp).isFI())
+ break;
+ }
+
+ // If this instruction doesn't have a frame index we've reached the end of
+ // the callee-save restoration.
+ if (FrameOp == MBBI->getNumOperands())
+ break;
+
+ // Likewise if it *is* a local reference, but not to a callee-saved object.
+ int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+ if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+ break;
+
+ FirstEpilogue = MBBI;
+ }
+
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ int64_t StaticFrameBase;
+ StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+ emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+ AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+ StaticFrameBase);
+ } else {
+ emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+ }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+ int FrameIndex,
+ unsigned &FrameReg,
+ int SPAdj,
+ bool IsCalleeSaveOp) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+ assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+ && "callee-saved register in unexpected place");
+
+ // If the frame for this function is particularly large, we adjust the stack
+ // in two phases which means the callee-save related operations see a
+ // different (intermediate) stack size.
+ int64_t FrameRegPos;
+ if (IsCalleeSaveOp) {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+ } else if (useFPForAddressing(MF)) {
+ // Have to use the frame pointer since we have no idea where SP is.
+ FrameReg = AArch64::X29;
+ FrameRegPos = FuncInfo->getFramePointerOffset();
+ } else {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+ }
+
+ return TopOfFrameOffset - FrameRegPos;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+ MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+ }
+
+ // If addressing of local variables is going to be more complicated than
+ // shoving a base register and an offset into the instruction then we may well
+ // need to scavenge registers. We should either specifically add an
+ // callee-save register for this purpose or allocate an extra spill slot.
+
+ bool BigStack =
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+ || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+ if (!BigStack)
+ return;
+
+ // We certainly need some slack space for the scavenger, preferably an extra
+ // register.
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ uint16_t ExtraReg = AArch64::NoRegister;
+
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+ !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+ ExtraReg = CSRegs[i];
+ break;
+ }
+ }
+
+ if (ExtraReg != 0) {
+ MF.getRegInfo().setPhysRegUsed(ExtraReg);
+ } else {
+ // Create a stack slot for scavenging purposes. PrologEpilogInserter
+ // helpfully places it near either SP or FP for us to avoid
+ // infinitely-regression during scavenging.
+ const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+ unsigned Reg) const {
+ // If @llvm.returnaddress is called then it will refer to X30 by some means;
+ // the prologue store does not kill the register.
+ if (Reg == AArch64::X30) {
+ if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+ && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+ return false;
+ }
+
+ // In all other cases, physical registers are dead after they've been saved
+ // but live at the beginning of the prologue block.
+ MBB.addLiveIn(Reg);
+ return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossClasses[],
+ unsigned NumClasses) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // A certain amount of implicit contract is present here. The actual stack
+ // offsets haven't been allocated officially yet, so for strictly correct code
+ // we rely on the fact that the elements of CSI are allocated in order
+ // starting at SP, purely as dictated by size and alignment. In practice since
+ // this function handles the only accesses to those slots it's not quite so
+ // important.
+ //
+ // We have also ordered the Callee-saved register list in AArch64CallingConv
+ // so that the above scheme puts registers in order: in particular we want
+ // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+ for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ // First we need to find out which register class the register belongs to so
+ // that we can use the correct load/store instrucitons.
+ unsigned ClassIdx;
+ for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+ if (PossClasses[ClassIdx].RegClass->contains(Reg))
+ break;
+ }
+ assert(ClassIdx != NumClasses
+ && "Asked to store register in unexpected class");
+ const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+ // Now we need to decide whether it's possible to emit a paired instruction:
+ // for this we want the next register to be in the same class.
+ MachineInstrBuilder NewMI;
+ bool Pair = false;
+ if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+ Pair = true;
+ unsigned StLow = 0, StHigh = 0;
+ if (isPrologue) {
+ // Most of these registers will be live-in to the MBB and killed by our
+ // store, though there are exceptions (see determinePrologueDeath).
+ StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+ StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ StLow = RegState::Define;
+ StHigh = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+ .addReg(CSI[i+1].getReg(), StLow)
+ .addReg(CSI[i].getReg(), StHigh);
+
+ // If it's a paired op, we've consumed two registers
+ ++i;
+ } else {
+ unsigned State;
+ if (isPrologue) {
+ State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ State = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL,
+ TII.get(PossClasses[ClassIdx].SingleOpcode))
+ .addReg(CSI[i].getReg(), State);
+ }
+
+ // Note that the FrameIdx refers to the second register in a pair: it will
+ // be allocated the smaller numeric address and so is the one an LDP/STP
+ // address must use.
+ int FrameIdx = CSI[i].getFrameIdx();
+ MachineMemOperand::MemOperandFlags Flags;
+ Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ Flags,
+ Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+ MFI.getObjectAlignment(FrameIdx));
+
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0) // address-register offset
+ .addMemOperand(MMO);
+
+ if (isPrologue)
+ NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+ // For aesthetic reasons, during an epilogue we want to emit complementary
+ // operations to the prologue, but in the opposite order. So we still
+ // iterate through the CalleeSavedInfo list in order, but we put the
+ // instructions successively earlier in the MBB.
+ if (!isPrologue)
+ --MBBI;
+ }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // This is a decision of ABI compliance. The AArch64 PCS gives various options
+ // for conformance, and even at the most stringent level more or less permits
+ // elimination for leaf functions because there's no loss of functionality
+ // (for debugging etc)..
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+ return true;
+
+ // The following are hard-limits: incorrect code will be generated if we try
+ // to omit the frame.
+ return (RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+ return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Of the various reasons for having a frame pointer, it's actually only
+ // variable-sized objects that prevent reservation of a call frame.
+ return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
+
+void
+AArch64FrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MI->getDebugLoc();
+ int Opcode = MI->getOpcode();
+ bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+ if (!hasReservedCallFrame(MF)) {
+ unsigned Align = getStackAlignment();
+
+ int64_t Amount = MI->getOperand(0).getImm();
+ Amount = RoundUpToAlignment(Amount, Align);
+ if (!IsDestroy) Amount = -Amount;
+
+ // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+ // doesn't have to pop anything), then the first operand will be zero too so
+ // this adjustment is a no-op.
+ if (CalleePopAmount == 0) {
+ // FIXME: in-function stack adjustment for calls is limited to 12-bits
+ // because there's no guaranteed temporary register available. Mostly call
+ // frames will be allocated at the start of a function so this is OK, but
+ // it is a limitation that needs dealing with.
+ assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+ }
+ } else if (CalleePopAmount != 0) {
+ // If the calling convention demands that the callee pops arguments from the
+ // stack, we want to add it back if we have a reserved call frame.
+ assert(CalleePopAmount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+ }
+
+ MBB.erase(MI);
+}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
new file mode 100644
index 000000000000..45ea0ec8e071
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -0,0 +1,108 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the AArch64-specific parts of the TargetFrameLowering
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+ // In order to unify the spilling and restoring of callee-saved registers into
+ // emitFrameMemOps, we need to be able to specify which instructions to use
+ // for the relevant memory operations on each register class. An array of the
+ // following struct is populated and passed in to achieve this.
+ struct LoadStoreMethod {
+ const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+ // The preferred instruction.
+ unsigned PairOpcode; // E.g. LSPair64_STR
+
+ // Sometimes only a single register can be handled at once.
+ unsigned SingleOpcode; // E.g. LS64_STR
+ };
+protected:
+ const AArch64Subtarget &STI;
+
+public:
+ explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Decides how much stack adjustment to perform in each phase of the prologue
+ /// and epilogue.
+ void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+ uint64_t &Residual) const;
+
+ int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+ unsigned &FrameReg, int SPAdj,
+ bool IsCalleeSaveOp) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// If the register is X30 (i.e. LR) and the return address is used in the
+ /// function then the callee-save store doesn't actually kill the register,
+ /// otherwise it does.
+ bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+ /// This function emits the loads or stores required during prologue and
+ /// epilogue as efficiently as possible.
+ ///
+ /// The operations involved in setting up and tearing down the frame are
+ /// similar enough to warrant a shared function, particularly as discrepancies
+ /// between the two would be disastrous.
+ void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossibleClasses[],
+ unsigned NumClasses) const;
+
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+
+ virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+ /// On AA
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..46b822152a00
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -0,0 +1,415 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+ AArch64TargetMachine &TM;
+ const AArch64InstrInfo *TII;
+
+ /// Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+public:
+ explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "AArch64 Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+ template<unsigned MemSize>
+ bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN || CN->getZExtValue() % MemSize != 0
+ || CN->getZExtValue() / MemSize > 0xfff)
+ return false;
+
+ UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+ return true;
+ }
+
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+ template<unsigned RegWidth>
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+ return SelectTSTBOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+ SDNode *TrySelectToMoveImm(SDNode *N);
+ SDNode *LowerToFPLitPool(SDNode *Node);
+ SDNode *SelectToLitPool(SDNode *N);
+
+ SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ if (!CN) return false;
+
+ // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+ // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+ // x-register.
+ //
+ // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+ // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+ // integers.
+ bool IsExact;
+
+ // fbits is between 1 and 64 in the worst-case, which means the fmul
+ // could have 2^64 as an actual operand. Need 65 bits of precision.
+ APSInt IntVal(65, true);
+ CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+ // N.b. isPowerOf2 also checks for > 0.
+ if (!IsExact || !IntVal.isPowerOf2()) return false;
+ unsigned FBits = IntVal.logBase2();
+
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding FBits, but it must still be in range.
+ if (FBits == 0 || FBits > RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+ return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ switch (ConstraintCode) {
+ default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+ case 'm':
+ // FIXME: more freedom is actually permitted for 'm'. We can go
+ // hunting for a base and an offset if we want. Of course, since
+ // we don't really know how the operand is going to be used we're
+ // probably restricted to the load/store pair's simm7 as an offset
+ // range anyway.
+ case 'Q':
+ OutOps.push_back(Op);
+ }
+
+ return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+ ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+ if (!Imm || !Imm->getValueAPF().isPosZero())
+ return false;
+
+ // Doesn't actually carry any information, but keeps TableGen quiet.
+ Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+ uint32_t Bits;
+ uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+ SDNode *ResNode;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT DestType = Node->getValueType(0);
+ unsigned DestWidth = DestType.getSizeInBits();
+
+ unsigned MOVOpcode;
+ EVT MOVType;
+ int UImm16, Shift;
+ uint32_t LogicalBits;
+
+ uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+ if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+ } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+ } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+ // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+ // use a 32-bit instruction: "movn w0, 0xedbc".
+ MOVType = MVT::i32;
+ MOVOpcode = AArch64::MOVNwii;
+ } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
+ MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+ uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+ return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+ CurDAG->getRegister(ZR, DestType),
+ CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+ } else {
+ // Can't handle it in one instruction. There's scope for permitting two (or
+ // more) instructions, but that'll need more thought.
+ return NULL;
+ }
+
+ ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+ CurDAG->getTargetConstant(UImm16, MVT::i32),
+ CurDAG->getTargetConstant(Shift, MVT::i32));
+
+ if (MOVType != DestType) {
+ ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+ MVT::i64, MVT::i32, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ SDValue(ResNode, 0),
+ CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+ }
+
+ return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+ int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+ EVT DestType = Node->getValueType(0);
+ EVT PtrVT = TLI.getPointerTy();
+
+ // Since we may end up loading a 64-bit constant from a 32-bit entry the
+ // constant in the pool may have a different type to the eventual node.
+ ISD::LoadExtType Extension;
+ EVT MemType;
+
+ assert((DestType == MVT::i64 || DestType == MVT::i32)
+ && "Only expect integer constants at the moment");
+
+ if (DestType == MVT::i32) {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i32;
+ } else if (UnsignedVal <= UINT32_MAX) {
+ Extension = ISD::ZEXTLOAD;
+ MemType = MVT::i32;
+ } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+ Extension = ISD::SEXTLOAD;
+ MemType = MVT::i32;
+ } else {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i64;
+ }
+
+ Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
+ MemType.getSizeInBits()),
+ UnsignedVal);
+ SDValue PoolAddr;
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
+ PoolAddr,
+ MachinePointerInfo::getConstantPool(), MemType,
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ Alignment).getNode();
+}
+
+SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+ EVT PtrVT = TLI.getPointerTy();
+ EVT DestType = Node->getValueType(0);
+
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
+ SDValue PoolAddr;
+
+ assert(TM.getCodeModel() == CodeModel::Small &&
+ "Only small code model supported");
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ /* isInvariant = */ true,
+ Alignment).getNode();
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ uint64_t Val = CN->getZExtValue();
+
+ if (!isPowerOf2_64(Val)) return false;
+
+ unsigned TestedBit = Log2_64(Val);
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding TestedBit, but it must still be in range.
+ if (TestedBit >= RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+ // Dump information about the Node being selected
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ return NULL;
+ }
+
+ switch (Node->getOpcode()) {
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT PtrTy = TLI.getPointerTy();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+ TFI, CurDAG->getTargetConstant(0, PtrTy));
+ }
+ case ISD::ConstantPool: {
+ // Constant pools are fine, just create a Target entry.
+ ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+ const Constant *C = CN->getConstVal();
+ SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+ ReplaceUses(SDValue(Node, 0), CP);
+ return NULL;
+ }
+ case ISD::Constant: {
+ SDNode *ResNode = 0;
+ if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+ // XZR and WZR are probably even better than an actual move: most of the
+ // time they can be folded into another instruction with *no* cost.
+
+ EVT Ty = Node->getValueType(0);
+ assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+ uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+ ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ Node->getDebugLoc(),
+ Register, Ty).getNode();
+ }
+
+ // Next best option is a move-immediate, see if we can do that.
+ if (!ResNode) {
+ ResNode = TrySelectToMoveImm(Node);
+ }
+
+ if (ResNode)
+ return ResNode;
+
+ // If even that fails we fall back to a lit-pool entry at the moment. Future
+ // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
+ ResNode = SelectToLitPool(Node);
+ assert(ResNode && "We need *some* way to materialise a constant");
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ Node = ResNode;
+ break;
+ }
+ case ISD::ConstantFP: {
+ if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+ // FMOV will take care of it from TableGen
+ break;
+ }
+
+ SDNode *ResNode = LowerToFPLitPool(Node);
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ Node = ResNode;
+ break;
+ }
+ default:
+ break; // Let generic code handle it
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << "\n");
+
+ return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new AArch64DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
new file mode 100644
index 000000000000..e9f449709c40
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -0,0 +1,2975 @@
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+
+using namespace llvm;
+
+static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
+ const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+ if (Subtarget->isTargetLinux())
+ return new AArch64LinuxTargetObjectFile();
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+ RegInfo(TM.getRegisterInfo()),
+ Itins(TM.getInstrItineraryData()) {
+
+ // SIMD compares set the entire lane's bits to 1
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // Scalar register <-> type mapping
+ addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
+ addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+ addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+ computeRegisterProperties();
+
+ // Some atomic operations can be folded into load-acquire or store-release
+ // instructions on AArch64. It's marginally simpler to let LLVM expand
+ // everything out to a barrier and then recombine the (few) barriers we can.
+ setInsertFencesForAtomic(true);
+ setTargetDAGCombine(ISD::ATOMIC_FENCE);
+ setTargetDAGCombine(ISD::ATOMIC_STORE);
+
+ // We combine OR nodes for bitfield and NEON BSL operations.
+ setTargetDAGCombine(ISD::OR);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::SRA);
+
+ // AArch64 does not have i1 loads, or much of anything for i1 really.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+ setStackPointerRegisterToSaveRestore(AArch64::XSP);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+
+ // We'll lower globals to wrappers for selection.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ // A64 instructions have the comparison predicate attached to the user of the
+ // result, but having a separate comparison is valuable for matching.
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // Legal floating-point operations.
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Legal);
+
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Legal);
+
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
+
+ // Illegal floating-point operations.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+
+ // Virtually no operation on f128 is legal, but LLVM can't expand them when
+ // there's a valid register class, so we need custom operations in most cases.
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+
+ // Lowering for many of the conversions is actually specified by the non-f128
+ // type. The LowerXXX function will be trivial when f128 isn't involved.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+
+ // This prevents LLVM trying to compress double constants into a floating
+ // constant-pool entry and trying to load from there. It's of doubtful benefit
+ // for A64: we'd need LDR followed by FCVT, I believe.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+ setExceptionPointerRegister(AArch64::X0);
+ setExceptionSelectorRegister(AArch64::X1);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+ // It's reasonably important that this value matches the "natural" legal
+ // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
+ // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
+ unsigned &strOpc) {
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for atomic binary op!");
+ case 1:
+ ldrOpc = AArch64::LDXR_byte;
+ strOpc = AArch64::STXR_byte;
+ break;
+ case 2:
+ ldrOpc = AArch64::LDXR_hword;
+ strOpc = AArch64::STXR_hword;
+ break;
+ case 4:
+ ldrOpc = AArch64::LDXR_word;
+ strOpc = AArch64::STXR_word;
+ break;
+ case 8:
+ ldrOpc = AArch64::LDXR_dword;
+ strOpc = AArch64::STXR_dword;
+ break;
+ }
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC
+ = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // <binop> scratch, dest, incr
+ // stxr stxr_status, scratch, ptr
+ // cbnz stxr_status, loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (BinOpcode) {
+ // All arithmetic operations we'll be creating are designed to take an extra
+ // shift or extend operand, which we can conveniently set to zero.
+
+ // Operand order needs to go the other way for NAND.
+ if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(incr).addReg(dest).addImm(0);
+ else
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(dest).addReg(incr).addImm(0);
+ }
+
+ // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned CmpOp,
+ A64CC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRC, *TRCsp;
+ if (Size == 8) {
+ TRC = &AArch64::GPR64RegClass;
+ TRCsp = &AArch64::GPR64xspRegClass;
+ } else {
+ TRC = &AArch64::GPR32RegClass;
+ TRCsp = &AArch64::GPR32wspRegClass;
+ }
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ MRI.constrainRegClass(scratch, TRCsp);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // cmp incr, dest (, sign extend if necessary)
+ // csel scratch, dest, incr, cond
+ // stxr stxr_status, scratch, ptr
+ // cbnz stxr_status, loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ // Build compare and cmov instructions.
+ MRI.constrainRegClass(incr, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(incr).addReg(oldval).addImm(0);
+
+ BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
+ scratch)
+ .addReg(oldval).addReg(incr).addImm(Cond);
+
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status)
+ .addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRCsp;
+ TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldxr dest, [ptr]
+ // cmp dest, oldval
+ // b.ne exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
+ MRI.constrainRegClass(dest, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(dest).addReg(oldval).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(exitMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex stxr_status, newval, [ptr]
+ // cbnz stxr_status, loop1MBB
+ BB = loop2MBB;
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loop1MBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // We materialise the F128CSEL pseudo-instruction using conditional branches
+ // and loads, giving an instruciton sequence like:
+ // str q0, [sp]
+ // b.ne IfTrue
+ // b Finish
+ // IfTrue:
+ // str q1, [sp]
+ // Finish:
+ // ldr q0, [sp]
+ //
+ // Using virtual registers would probably not be beneficial since COPY
+ // instructions are expensive for f128 (there's no actual instruction to
+ // implement them).
+ //
+ // An alternative would be to do an integer-CSEL on some address. E.g.:
+ // mov x0, sp
+ // add x1, sp, #16
+ // str q0, [x0]
+ // str q1, [x1]
+ // csel x0, x0, x1, ne
+ // ldr q0, [x0]
+ //
+ // It's unclear which approach is actually optimal.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineFunction *MF = MBB->getParent();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned IfTrueReg = MI->getOperand(1).getReg();
+ unsigned IfFalseReg = MI->getOperand(2).getReg();
+ unsigned CondCode = MI->getOperand(3).getImm();
+ bool NZCVKilled = MI->getOperand(4).isKill();
+
+ MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBB);
+ MF->insert(It, EndBB);
+
+ // Transfer rest of current basic-block to EndBB
+ EndBB->splice(EndBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // We need somewhere to store the f128 value needed.
+ int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
+
+ // [... start of incoming MBB ...]
+ // str qIFFALSE, [sp]
+ // b.cc IfTrue
+ // b Done
+ BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfFalseReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(AArch64::Bcc))
+ .addImm(CondCode)
+ .addMBB(TrueBB);
+ BuildMI(MBB, DL, TII->get(AArch64::Bimm))
+ .addMBB(EndBB);
+ MBB->addSuccessor(TrueBB);
+ MBB->addSuccessor(EndBB);
+
+ // IfTrue:
+ // str qIFTRUE, [sp]
+ BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfTrueReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
+ // blocks.
+ TrueBB->addSuccessor(EndBB);
+
+ // Done:
+ // ldr qDEST, [sp]
+ // [... rest of incoming MBB ...]
+ if (!NZCVKilled)
+ EndBB->addLiveIn(AArch64::NZCV);
+ MachineInstr *StartOfEnd = EndBB->begin();
+ BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ MI->eraseFromParent();
+ return EndBB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unhandled instruction with custom inserter");
+ case AArch64::F128CSEL:
+ return EmitF128CSEL(MI, MBB);
+ case AArch64::ATOMIC_LOAD_ADD_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_SUB_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_AND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_OR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_XOR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_NAND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_MIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
+
+ case AArch64::ATOMIC_LOAD_MAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
+
+ case AArch64::ATOMIC_LOAD_UMIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
+
+ case AArch64::ATOMIC_LOAD_UMAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
+
+ case AArch64::ATOMIC_SWAP_I8:
+ return emitAtomicBinary(MI, MBB, 1, 0);
+ case AArch64::ATOMIC_SWAP_I16:
+ return emitAtomicBinary(MI, MBB, 2, 0);
+ case AArch64::ATOMIC_SWAP_I32:
+ return emitAtomicBinary(MI, MBB, 4, 0);
+ case AArch64::ATOMIC_SWAP_I64:
+ return emitAtomicBinary(MI, MBB, 8, 0);
+
+ case AArch64::ATOMIC_CMP_SWAP_I8:
+ return emitAtomicCmpSwap(MI, MBB, 1);
+ case AArch64::ATOMIC_CMP_SWAP_I16:
+ return emitAtomicCmpSwap(MI, MBB, 2);
+ case AArch64::ATOMIC_CMP_SWAP_I32:
+ return emitAtomicCmpSwap(MI, MBB, 4);
+ case AArch64::ATOMIC_CMP_SWAP_I64:
+ return emitAtomicCmpSwap(MI, MBB, 8);
+ }
+}
+
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
+ case AArch64ISD::Call: return "AArch64ISD::Call";
+ case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
+ case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
+ case AArch64ISD::BFI: return "AArch64ISD::BFI";
+ case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
+ case AArch64ISD::Ret: return "AArch64ISD::Ret";
+ case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
+ case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
+ case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
+ case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
+ case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+ case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
+ case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
+
+ default: return NULL;
+ }
+}
+
+static const uint16_t AArch64FPRArgRegs[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+ AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
+};
+static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+
+static const uint16_t AArch64ArgRegs[] = {
+ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+ AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
+};
+static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+
+static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ // Mark all remaining general purpose registers as allocated. We don't
+ // backtrack: if (for example) an i128 gets put on the stack, no subsequent
+ // i64 will go in registers (C.11).
+ for (unsigned i = 0; i < NumArgRegs; ++i)
+ State.AllocateReg(AArch64ArgRegs[i]);
+
+ return false;
+}
+
+#include "AArch64GenCallingConv.inc"
+
+CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+
+ switch(CC) {
+ default: llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return CC_A64_APCS;
+ }
+}
+
+void
+AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ SmallVector<SDValue, 8> MemOps;
+
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
+ NumArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
+ NumFPRArgRegs);
+
+ unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
+ int GPRIdx = 0;
+ if (GPRSaveSize != 0) {
+ GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
+ }
+
+ unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+ int FPRIdx = 0;
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+ &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ }
+
+ int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
+
+ FuncInfo->setVariadicStackIdx(StackIdx);
+ FuncInfo->setVariadicGPRIdx(GPRIdx);
+ FuncInfo->setVariadicGPRSize(GPRSaveSize);
+ FuncInfo->setVariadicFPRIdx(FPRIdx);
+ FuncInfo->setVariadicFPRSize(FPRSaveSize);
+
+ if (!MemOps.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+ }
+}
+
+
+SDValue
+AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Byval is used for small structs and HFAs in the PCS, but the system
+ // should work in a non-compliant manner for larger structs.
+ EVT PtrTy = getPointerTy();
+ int Size = Flags.getByValSize();
+ unsigned NumRegs = (Size + 7) / 8;
+
+ unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
+ VA.getLocMemOffset(),
+ false);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ InVals.push_back(FrameIdxN);
+
+ continue;
+ } else if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ } else { // VA.isRegLoc()
+ assert(VA.isMemLoc());
+
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+
+
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned DestSize = VA.getValVT().getSizeInBits();
+ unsigned DestSubReg;
+
+ switch (DestSize) {
+ case 8: DestSubReg = AArch64::sub_8; break;
+ case 16: DestSubReg = AArch64::sub_16; break;
+ case 32: DestSubReg = AArch64::sub_32; break;
+ case 64: DestSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+ VA.getValVT(), ArgValue,
+ DAG.getTargetConstant(DestSubReg, MVT::i32)),
+ 0);
+ break;
+ }
+ }
+
+ InVals.push_back(ArgValue);
+ }
+
+ if (isVarArg)
+ SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+
+ unsigned StackArgSize = CCInfo.getNextStackOffset();
+ if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+ // This is a non-standard ABI so by fiat I say we're allowed to make full
+ // use of the stack area to be popped, which must be aligned to 16 bytes in
+ // any case:
+ StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+
+ // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+ // a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+ // This realignment carries over to the available bytes below. Our own
+ // callers will guarantee the space is free by giving an aligned value to
+ // CALLSEQ_START.
+ }
+ // Even if we're not expected to free up the space, it's useful to know how
+ // much is there while considering tail calls (because we can reuse it).
+ FuncInfo->setBytesInStackArgArea(StackArgSize);
+
+ return Chain;
+}
+
+SDValue
+AArch64TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ // PCS: "If the type, T, of the result of a function is such that
+ // void func(T arg) would require that arg be passed as a value in a
+ // register (or set of registers) according to the rules in 5.4, then the
+ // result is returned in the same registers as would be used for such an
+ // argument.
+ //
+ // Otherwise, the caller shall reserve a block of memory of sufficient
+ // size and alignment to hold the result. The address of the memory block
+ // shall be passed as an additional argument to the function in x8."
+ //
+ // This is implemented in two places. The register-return values are dealt
+ // with here, more complex returns are passed as an sret parameter, which
+ // means we don't have to worry about it during actual return.
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+
+
+ SDValue Arg = OutVals[i];
+
+ // There's no convenient note in the ABI about this as there is for normal
+ // arguments, but it says return values are passed in the same registers as
+ // an argument would be. I believe that includes the comments about
+ // unspecified higher bits, putting the burden of widening on the *caller*
+ // for return values.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt:
+ // Floating-point values should only be extended when they're going into
+ // memory, which can't happen here so an integer extend is acceptable.
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+
+ if (IsTailCall) {
+ IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+
+ // A sibling call is one where we're under the usual C ABI and not planning
+ // to change that but can still do a tail call:
+ if (!TailCallOpt && IsTailCall)
+ IsSibCall = true;
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ // On AArch64 (and all other architectures I'm aware of) the most this has to
+ // do is adjust the stack pointer.
+ unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
+ if (IsSibCall) {
+ // Since we're not changing the ABI to make this a tail call, the memory
+ // operands are already available in the caller's incoming argument space.
+ NumBytes = 0;
+ }
+
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0. Completely unused for non-tail calls.
+ int FPDiff = 0;
+
+ if (IsTailCall && !IsSibCall) {
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // can actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
+
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+ }
+
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
+ getPointerTy());
+
+ SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue Arg = OutVals[i];
+
+ // Callee does the actual widening, so all extensions just use an implicit
+ // definition of the rest of the Loc. Aesthetically, this would be nicer as
+ // an ANY_EXTEND, but that isn't valid for floating-point types and this
+ // alternative works on integer types too.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned SrcSize = VA.getValVT().getSizeInBits();
+ unsigned SrcSubReg;
+
+ switch (SrcSize) {
+ case 8: SrcSubReg = AArch64::sub_8; break;
+ case 16: SrcSubReg = AArch64::sub_16; break;
+ case 32: SrcSubReg = AArch64::sub_32; break;
+ case 64: SrcSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ VA.getLocVT(),
+ DAG.getUNDEF(VA.getLocVT()),
+ Arg,
+ DAG.getTargetConstant(SrcSubReg, MVT::i32)),
+ 0);
+
+ break;
+ }
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ // A normal register (sub-) argument. For now we just note it down because
+ // we want to copy things into registers as late as possible to avoid
+ // register-pressure (and possibly worse).
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc() && "unexpected argument location");
+
+ SDValue DstAddr;
+ MachinePointerInfo DstInfo;
+ if (IsTailCall) {
+ uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
+ VA.getLocVT().getSizeInBits();
+ OpSize = (OpSize + 7) / 8;
+ int32_t Offset = VA.getLocMemOffset() + FPDiff;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+ DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Make sure any stack arguments overlapping with where we're storing are
+ // loaded before this eventual operation. Otherwise they'll be clobbered.
+ Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+ } else {
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+
+ DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
+ }
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
+ SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile = */ false,
+ /*alwaysInline = */ false,
+ DstInfo, MachinePointerInfo(0));
+ MemOpChains.push_back(Cpy);
+ } else {
+ // Normal stack argument, put it where it's needed.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
+ false, false, 0);
+ MemOpChains.push_back(Store);
+ }
+ }
+
+ // The loads and stores generated above shouldn't clash with each
+ // other. Combining them with this TokenFactor notes that fact for the rest of
+ // the backend.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Most of the rest of the instructions need to be glued together; we don't
+ // want assignments to actual registers used by a call to be rearranged by a
+ // well-meaning scheduler.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // The linker is responsible for inserting veneers when necessary to put a
+ // function call destination in range, so we don't need to bother with a
+ // wrapper here.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // We don't usually want to end the call-sequence here because we would tidy
+ // the frame up *after* the call, however in the ABI-changing tail-call case
+ // we've carefully laid out the parameters so that when sp is reset they'll be
+ // in the correct location.
+ if (IsTailCall && !IsSibCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // We produce the following DAG scheme for the actual call instruction:
+ // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
+ //
+ // Most arguments aren't going to be used and just keep the values live as
+ // far as LLVM is concerned. It's expected to be selected as simply "bl
+ // callee" (for a direct, non-tail call).
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (IsTailCall) {
+ // Each tail call may have to adjust the stack by a different amount, so
+ // this information must travel along with the operation for eventual
+ // consumption by emitEpilogue.
+ Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ }
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+
+ // Add a register mask operand representing the call-preserved registers. This
+ // is used later in codegen to constrain register-allocation.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // If we needed glue, put it in as the last argument.
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ if (IsTailCall) {
+ return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Now we can reclaim the stack, just as well do it before working out where
+ // our return value is.
+ if (!IsSibCall) {
+ uint64_t CalleePopBytes
+ = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(CalleePopBytes, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ return LowerCallResult(Chain, InFlag, CallConv,
+ IsVarArg, Ins, dl, DAG, InVals);
+}
+
+SDValue
+AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ // Return values that are too big to fit into registers should use an sret
+ // pointer, so this can be a lot simpler than the main argument code.
+ assert(VA.isRegLoc() && "Memory locations not expected for call return");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ case CCValAssign::SExt:
+ case CCValAssign::AExt:
+ // Floating-point arguments only get extended/truncated if they're going
+ // in memory, so using the integer operation is acceptable here.
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+bool
+AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+
+ // For CallingConv::C this function knows whether the ABI needs
+ // changing. That's not true for other conventions so they will have to opt in
+ // manually.
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86) but less efficient and uglier in LowerCall.
+ for (Function::const_arg_iterator i = CallerF->arg_begin(),
+ e = CallerF->arg_end(); i != e; ++i)
+ if (i->hasByValAttr())
+ return false;
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Now we search for cases where we can use a tail call without changing the
+ // ABI. Sibcall is used in some places (particularly gcc) to refer to this
+ // concept.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!IsVarArg || CalleeCC == CallingConv::C)
+ && "Unexpected variadic calling convention");
+
+ if (IsVarArg && !Outs.empty()) {
+ // At least two cases here: if caller is fastcc then we can't have any
+ // memory arguments (we'd be expected to clean up the stack afterwards). If
+ // caller is C then we could potentially use its argument area.
+
+ // FIXME: for now we take the most conservative of these in both cases:
+ // disallow all variadic memory operands.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // Nothing more to check if the callee is taking no arguments
+ if (Outs.empty())
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+ const AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ // If the stack arguments for this call would fit into our own save area then
+ // the call can be made tail.
+ return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
+}
+
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+ bool TailCallOpt) const {
+ return CallCC == CallingConv::Fast && TailCallOpt;
+}
+
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+ return CallCC == CallingConv::Fast;
+}
+
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+ SelectionDAG &DAG,
+ MachineFrameInfo *MFI,
+ int ClobberedFI) const {
+ SmallVector<SDValue, 8> ArgChains;
+ int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
+ int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument corresponding
+ for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
+ UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0) {
+ int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+ int64_t InLastByte = InFirstByte;
+ InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+
+ if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+ (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+ ArgChains.push_back(SDValue(L, 1));
+ }
+
+ // Build a tokenfactor for all the chains.
+ return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETEQ: return A64CC::EQ;
+ case ISD::SETGT: return A64CC::GT;
+ case ISD::SETGE: return A64CC::GE;
+ case ISD::SETLT: return A64CC::LT;
+ case ISD::SETLE: return A64CC::LE;
+ case ISD::SETNE: return A64CC::NE;
+ case ISD::SETUGT: return A64CC::HI;
+ case ISD::SETUGE: return A64CC::HS;
+ case ISD::SETULT: return A64CC::LO;
+ case ISD::SETULE: return A64CC::LS;
+ default: llvm_unreachable("Unexpected condition code");
+ }
+}
+
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
+ // icmp is implemented using adds/subs immediate, which take an unsigned
+ // 12-bit immediate, optionally shifted left by 12 bits.
+
+ // Symmetric by using adds/subs
+ if (Val < 0)
+ Val = -Val;
+
+ return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
+}
+
+SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &A64cc,
+ SelectionDAG &DAG, DebugLoc &dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ int64_t C = 0;
+ EVT VT = RHSC->getValueType(0);
+ bool knownInvalid = false;
+
+ // I'm not convinced the rest of LLVM handles these edge cases properly, but
+ // we can at least get it right.
+ if (isSignedIntSetCC(CC)) {
+ C = RHSC->getSExtValue();
+ } else if (RHSC->getZExtValue() > INT64_MAX) {
+ // A 64-bit constant not representable by a signed 64-bit integer is far
+ // too big to fit into a SUBS immediate anyway.
+ knownInvalid = true;
+ } else {
+ C = RHSC->getZExtValue();
+ }
+
+ if (!knownInvalid && !isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ }
+ }
+ }
+
+ A64CC::CondCodes CondCode = IntCCToA64CC(CC);
+ A64cc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+}
+
+static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
+ A64CC::CondCodes &Alternative) {
+ A64CC::CondCodes CondCode = A64CC::Invalid;
+ Alternative = A64CC::Invalid;
+
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = A64CC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = A64CC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = A64CC::GE; break;
+ case ISD::SETOLT: CondCode = A64CC::MI; break;
+ case ISD::SETOLE: CondCode = A64CC::LS; break;
+ case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
+ case ISD::SETO: CondCode = A64CC::VC; break;
+ case ISD::SETUO: CondCode = A64CC::VS; break;
+ case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
+ case ISD::SETUGT: CondCode = A64CC::HI; break;
+ case ISD::SETUGE: CondCode = A64CC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = A64CC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = A64CC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = A64CC::NE; break;
+ }
+ return CondCode;
+}
+
+SDValue
+AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small
+ && "Only small code model supported at the moment");
+
+ // The most efficient code is PC-relative anyway for the small memory model,
+ // so we don't need to worry about relocation model.
+ return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+}
+
+
+// (BRCOND chain, val, dest)
+SDValue
+AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ SDValue TheBit = Op.getOperand(1);
+ SDValue DestBB = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
+ A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
+ DestBB);
+}
+
+// (BR_CC chain, condcode, lhs, rhs, dest)
+SDValue
+AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue DestBB = Op.getOperand(4);
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to runtime calls by a routine which sets
+ // LHS, RHS and CC appropriately for the rest of this function to continue.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, CmpOp, A64cc, DestBB);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, SetCC, A64cc, DestBB);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ A64BR_CC, SetCC, A64cc, DestBB);
+
+ }
+
+ return A64BR_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ ArgListTy Args;
+ ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
+
+ Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
+ 0, getLibcallCallingConv(Call), isTailCall,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, Op.getDebugLoc());
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+ // we make that distinction here.
+
+ // We support the small memory model for now.
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+ // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+ // to zero when they remain undefined. In PIC mode the GOT can take care of
+ // this, but in absolute mode we use a constant pool load.
+ SDValue PoolAddr;
+ PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(8, MVT::i32));
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalAddr;
+ }
+
+ if (Alignment == 0) {
+ const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
+ if (GVPtrTy->getElementType()->isSized()) {
+ Alignment
+ = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
+ } else {
+ // Be conservative if we can't guess, not that it really matters:
+ // functions and labels aren't valid for loads, and the methods used to
+ // actually calculate an address work with any alignment.
+ Alignment = 1;
+ }
+ }
+
+ unsigned char HiFixup, LoFixup;
+ bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+
+ if (UseGOT) {
+ HiFixup = AArch64II::MO_GOT;
+ LoFixup = AArch64II::MO_GOT_LO12;
+ Alignment = 8;
+ } else {
+ HiFixup = AArch64II::MO_NO_FLAG;
+ LoFixup = AArch64II::MO_LO12;
+ }
+
+ // AArch64's small model demands the following sequence:
+ // ADRP x0, somewhere
+ // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
+ SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ HiFixup),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ LoFixup),
+ DAG.getConstant(Alignment, MVT::i32));
+
+ if (UseGOT) {
+ GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
+ GlobalRef);
+ }
+
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalRef;
+}
+
+SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
+ SDValue DescAddr,
+ DebugLoc DL,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+
+ // The function we need to call is simply the first entry in the GOT for this
+ // descriptor, load it in preparation.
+ SDValue Func, Chain;
+ Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ DescAddr);
+
+ // The function takes only one argument: the address of the descriptor itself
+ // in X0.
+ SDValue Glue;
+ Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
+ Glue = Chain.getValue(1);
+
+ // Finally, there's a special calling-convention which means that the lookup
+ // must preserve all registers (except X0, obviously).
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *A64RI
+ = static_cast<const AArch64RegisterInfo *>(TRI);
+ const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
+
+ // We're now ready to populate the argument list, as with a normal call:
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Func);
+ Ops.push_back(SymAddr);
+ Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ Ops.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
+ Ops.size());
+ Glue = Chain.getValue(1);
+
+ // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
+ // back to the generic handling code.
+ return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ SDValue TPOff;
+ EVT PtrVT = getPointerTy();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+
+ SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+
+ if (Model == TLSModel::InitialExec) {
+ TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL_LO12),
+ DAG.getConstant(8, MVT::i32));
+ TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ TPOff);
+ } else if (Model == TLSModel::LocalExec) {
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
+ // Accesses used in this sequence go via the TLS descriptor which lives in
+ // the GOT. Prepare an address we can use to handle this.
+ SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
+
+ TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ } else if (Model == TLSModel::LocalDynamic) {
+ // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
+ // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
+ // the beginning of the module's TLS region, followed by a DTPREL offset
+ // calculation.
+
+ // These accesses will need deduplicating if there's more than one.
+ AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<AArch64MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+
+ // Get the location of _TLS_MODULE_BASE_:
+ SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
+
+ ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+ // Get the variable's offset from _TLS_MODULE_BASE_
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else
+ llvm_unreachable("Unsupported TLS access model");
+
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+}
+
+SDValue
+AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getValueType() != MVT::f128) {
+ // Legal for everything except f128.
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+
+SDValue
+AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ DebugLoc dl = JT->getDebugLoc();
+
+ // When compiling PIC, jump tables get put in the code section so a static
+ // relocation-style is acceptable for both cases.
+ return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ AArch64II::MO_LO12),
+ DAG.getConstant(1, MVT::i32));
+}
+
+// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
+SDValue
+AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue IfTrue = Op.getOperand(2);
+ SDValue IfFalse = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to libcalls, but slot in nicely here
+ // afterwards.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ CmpOp, IfTrue, IfFalse, A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
+ Op.getValueType(),
+ SetCC, IfTrue, IfFalse, A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ SetCC, IfTrue, A64SELECT_CC, A64cc);
+
+ }
+
+ return A64SELECT_CC;
+}
+
+// (SELECT testbit, iftrue, iffalse)
+SDValue
+AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue TheBit = Op.getOperand(0);
+ SDValue IfTrue = Op.getOperand(1);
+ SDValue IfFalse = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ A64CMP, IfTrue, IfFalse,
+ DAG.getConstant(A64CC::NE, MVT::i32));
+}
+
+// (SETCC lhs, rhs, condcode)
+SDValue
+AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ EVT VT = Op.getValueType();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
+ // for the rest of the function (some i32 or i64 values).
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (RHS.getNode() == 0) {
+ assert(LHS.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return LHS;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT),
+ DAG.getConstant(0, VT), A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
+ DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+ }
+
+ return A64SELECT_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+
+ // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
+ // rather than just 8.
+ return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(32, MVT::i32), 8, false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // The layout of the va_list struct is specified in the AArch64 Procedure Call
+ // Standard, section B.3.
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAList = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SmallVector<SDValue, 4> MemOps;
+
+ // void *__stack at offset 0
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
+ getPointerTy());
+ MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
+ MachinePointerInfo(SV), false, false, 0));
+
+ // void *__gr_top at offset 8
+ int GPRSize = FuncInfo->getVariadicGPRSize();
+ if (GPRSize > 0) {
+ SDValue GRTop, GRTopAddr;
+
+ GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(8, getPointerTy()));
+
+ GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+ GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
+ DAG.getConstant(GPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
+ MachinePointerInfo(SV, 8),
+ false, false, 0));
+ }
+
+ // void *__vr_top at offset 16
+ int FPRSize = FuncInfo->getVariadicFPRSize();
+ if (FPRSize > 0) {
+ SDValue VRTop, VRTopAddr;
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(16, getPointerTy()));
+
+ VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+ VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
+ DAG.getConstant(FPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
+ MachinePointerInfo(SV, 16),
+ false, false, 0));
+ }
+
+ // int __gr_offs at offset 24
+ SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(24, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+ GROffsAddr, MachinePointerInfo(SV, 24),
+ false, false, 0));
+
+ // int __vr_offs at offset 28
+ SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(28, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+ VROffsAddr, MachinePointerInfo(SV, 28),
+ false, false, 0));
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
+ case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
+ case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t TruncMask = N->getConstantOperandVal(1);
+ if (!isMask_64(TruncMask))
+ return SDValue();
+
+ uint64_t Width = CountPopulation_64(TruncMask);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+ uint64_t LSB = Shift->getConstantOperandVal(1);
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // An atomic operation followed by an acquiring atomic fence can be reduced to
+ // an acquiring load. The atomic operation provides a convenient pointer to
+ // load from. If the original operation was a load anyway we can actually
+ // combine the two operations into an acquiring load.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue AtomicOp = FenceNode->getOperand(0);
+ AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
+
+ // A fence on its own can't be optimised
+ if (!AtomicNode)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
+
+ if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
+ // the chain we use should be its input, otherwise we'll put our store after
+ // it so we use its output chain.
+ SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
+ AtomicNode->getChain() : AtomicOp;
+
+ // We have an acquire fence with a handy atomic operation nearby, we can
+ // convert the fence into a load-acquire, discarding the result.
+ DebugLoc DL = FenceNode->getDebugLoc();
+ SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
+ AtomicNode->getValueType(0),
+ Chain, // Chain
+ AtomicOp.getOperand(1), // Pointer
+ AtomicNode->getMemOperand(), Acquire,
+ FenceScope);
+
+ if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
+ DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
+
+ return Op.getValue(1);
+}
+
+static SDValue PerformATOMIC_STORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // A releasing atomic fence followed by an atomic store can be combined into a
+ // single store operation.
+ SelectionDAG &DAG = DCI.DAG;
+ AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
+ SDValue FenceOp = AtomicNode->getOperand(0);
+
+ if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
+
+ if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ DebugLoc DL = AtomicNode->getDebugLoc();
+ return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
+ FenceOp.getOperand(0), // Chain
+ AtomicNode->getOperand(1), // Pointer
+ AtomicNode->getOperand(2), // Value
+ AtomicNode->getMemOperand(), Release,
+ FenceScope);
+}
+
+/// For a true bitfield insert, the bits getting into that contiguous mask
+/// should come from the low part of an existing value: they must be formed from
+/// a compatible SHL operation (unless they're already low). This function
+/// checks that condition and returns the least-significant bit that's
+/// intended. If the operation not a field preparation, -1 is returned.
+static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+ SDValue &MaskedVal, uint64_t Mask) {
+ if (!isShiftedMask_64(Mask))
+ return -1;
+
+ // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
+ // instruction. BFI will do a left-shift by LSB before applying the mask we've
+ // spotted, so in general we should pre-emptively "undo" that by making sure
+ // the incoming bits have had a right-shift applied to them.
+ //
+ // This right shift, however, will combine with existing left/right shifts. In
+ // the simplest case of a completely straight bitfield operation, it will be
+ // expected to completely cancel out with an existing SHL. More complicated
+ // cases (e.g. bitfield to bitfield copy) may still need a real shift before
+ // the BFI.
+
+ uint64_t LSB = CountTrailingZeros_64(Mask);
+ int64_t ShiftRightRequired = LSB;
+ if (MaskedVal.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ } else if (MaskedVal.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ }
+
+ if (ShiftRightRequired > 0)
+ MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
+ DAG.getConstant(ShiftRightRequired, MVT::i64));
+ else if (ShiftRightRequired < 0) {
+ // We could actually end up with a residual left shift, for example with
+ // "struc.bitfield = val << 1".
+ MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
+ DAG.getConstant(-ShiftRightRequired, MVT::i64));
+ }
+
+ return LSB;
+}
+
+/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
+/// a mask and an extension. Returns true if a BFI was found and provides
+/// information on its surroundings.
+static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
+ bool &Extended) {
+ Extended = false;
+ if (N.getOpcode() == ISD::ZERO_EXTEND) {
+ Extended = true;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ Mask = N->getConstantOperandVal(1);
+ N = N.getOperand(0);
+ } else {
+ // Mask is the whole width.
+ Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
+ }
+
+ if (N.getOpcode() == AArch64ISD::BFI) {
+ BFI = N;
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
+/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
+/// can often be further combined with a larger mask. Ultimately, we want mask
+/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
+static SDValue tryCombineToBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
+ // abandon the effort.
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t LHSMask;
+ if (isa<ConstantSDNode>(LHS.getOperand(1)))
+ LHSMask = LHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
+ // is or abandon the effort.
+ SDValue RHS = N->getOperand(1);
+ if (RHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t RHSMask;
+ if (isa<ConstantSDNode>(RHS.getOperand(1)))
+ RHSMask = RHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // Can't do anything if the masks are incompatible.
+ if (LHSMask & RHSMask)
+ return SDValue();
+
+ // Now we need one of the masks to be a contiguous field. Without loss of
+ // generality that should be the RHS one.
+ SDValue Bitfield = LHS.getOperand(0);
+ if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
+ // We know that LHS is a candidate new value, and RHS isn't already a better
+ // one.
+ std::swap(LHS, RHS);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ // We've done our best to put the right operands in the right places, all we
+ // can do now is check whether a BFI exists.
+ Bitfield = RHS.getOperand(0);
+ int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
+ if (LSB == -1)
+ return SDValue();
+
+ uint32_t Width = CountPopulation_64(RHSMask);
+ assert(Width && "Expected non-zero bitfield width");
+
+ SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ LHS.getOperand(0), Bitfield,
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(Width, MVT::i64));
+
+ // Mask is trivial
+ if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+}
+
+/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
+/// original input. This is surprisingly common because SROA splits things up
+/// into i8 chunks, so the originally detected MaskedBFI may actually only act
+/// on the low (say) byte of a word. This is then orred into the rest of the
+/// word afterwards.
+///
+/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
+///
+/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
+/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
+/// involved.
+static SDValue tryCombineToLargerBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // First job is to hunt for a MaskedBFI on either the left or right. Swap
+ // operands if it's actually on the right.
+ SDValue BFI;
+ SDValue PossExtraMask;
+ uint64_t ExistingMask = 0;
+ bool Extended = false;
+ if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(1);
+ else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(0);
+ else
+ return SDValue();
+
+ // We can only combine a BFI with another compatible mask.
+ if (PossExtraMask.getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+
+ // Masks must be compatible.
+ if (ExtraMask & ExistingMask)
+ return SDValue();
+
+ SDValue OldBFIVal = BFI.getOperand(0);
+ SDValue NewBFIVal = BFI.getOperand(1);
+ if (Extended) {
+ // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
+ // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
+ // need to be made compatible.
+ assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
+ && "Invalid types for BFI");
+ OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
+ NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+ }
+
+ // We need the MaskedBFI to be combined with a mask of the *same* value.
+ if (PossExtraMask.getOperand(0) != OldBFIVal)
+ return SDValue();
+
+ BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ OldBFIVal, NewBFIVal,
+ BFI.getOperand(2), BFI.getOperand(3));
+
+ // If the masking is trivial, we don't need to create it.
+ if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(ExtraMask | ExistingMask, VT));
+}
+
+/// An EXTR instruction is made up of two shifts, ORed together. This helper
+/// searches for and classifies those shifts.
+static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
+ bool &FromHi) {
+ if (N.getOpcode() == ISD::SHL)
+ FromHi = false;
+ else if (N.getOpcode() == ISD::SRL)
+ FromHi = true;
+ else
+ return false;
+
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ return false;
+
+ ShiftAmount = N->getConstantOperandVal(1);
+ Src = N->getOperand(0);
+ return true;
+}
+
+/// EXTR instruction extracts a contiguous chunk of bits from two existing
+/// registers viewed as a high/low pair. This function looks for the pattern:
+/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
+/// EXTR. Can't quite be done in TableGen because the two immediates aren't
+/// independent.
+static SDValue tryCombineToEXTR(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue LHS;
+ uint32_t ShiftLHS = 0;
+ bool LHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
+ return SDValue();
+
+ SDValue RHS;
+ uint32_t ShiftRHS = 0;
+ bool RHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
+ return SDValue();
+
+ // If they're both trying to come from the high part of the register, they're
+ // not really an EXTR.
+ if (LHSFromHi == RHSFromHi)
+ return SDValue();
+
+ if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
+ return SDValue();
+
+ if (LHSFromHi) {
+ std::swap(LHS, RHS);
+ std::swap(ShiftLHS, ShiftRHS);
+ }
+
+ return DAG.getNode(AArch64ISD::EXTR, DL, VT,
+ LHS, RHS,
+ DAG.getConstant(ShiftRHS, MVT::i64));
+}
+
+/// Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // Attempt to recognise bitfield-insert operations.
+ SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ // Attempt to combine an existing MaskedBFI operation into one with a larger
+ // mask.
+ Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ Res = tryCombineToEXTR(N, DCI);
+ if (Res.getNode())
+ return Res;
+
+ return SDValue();
+}
+
+/// Target-specific dag combine xforms for ISD::SRA
+static SDValue PerformSRACombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraSignBits = N->getConstantOperandVal(1);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+
+ uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
+ uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
+ uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+
+SDValue
+AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
+ case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::SRA: return PerformSRACombine(N, DCI);
+ }
+ return SDValue();
+}
+
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'w': // An FP/SIMD vector register
+ return C_RegisterClass;
+ case 'I': // Constant that can be used with an ADD instruction
+ case 'J': // Constant that can be used with a SUB instruction
+ case 'K': // Constant that can be used with a 32-bit logical instruction
+ case 'L': // Constant that can be used with a 64-bit logical instruction
+ case 'M': // Constant that can be used as a 32-bit MOV immediate
+ case 'N': // Constant that can be used as a 64-bit MOV immediate
+ case 'Y': // Floating point constant zero
+ case 'Z': // Integer constant zero
+ return C_Other;
+ case 'Q': // A memory reference with base register and no offset
+ return C_Memory;
+ case 'S': // A symbolic address
+ return C_Other;
+ }
+ }
+
+ // FIXME: Ump, Utf, Usa, Ush
+ // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
+ // whatever they may be
+ // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+ // Usa: An absolute symbolic address
+ // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+ assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
+ && Constraint != "Ush" && "Unimplemented constraints");
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const {
+
+ llvm_unreachable("Constraint weight unimplemented");
+}
+
+void
+AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only length 1 constraints are C_Other.
+ if (Constraint.size() != 1) return;
+
+ // Only C_Other constraints get lowered like this. That means constants for us
+ // so return early if there's no hope the constraint can be lowered.
+
+ switch(Constraint[0]) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'Z': {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ uint64_t CVal = C->getZExtValue();
+ uint32_t Bits;
+
+ switch (Constraint[0]) {
+ default:
+ // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
+ // is a peculiarly useless SUB constraint.
+ llvm_unreachable("Unimplemented C_Other constraint");
+ case 'I':
+ if (CVal <= 0xfff)
+ break;
+ return;
+ case 'K':
+ if (A64Imms::isLogicalImm(32, CVal, Bits))
+ break;
+ return;
+ case 'L':
+ if (A64Imms::isLogicalImm(64, CVal, Bits))
+ break;
+ return;
+ case 'Z':
+ if (CVal == 0)
+ break;
+ return;
+ }
+
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+ case 'S': {
+ // An absolute symbolic address or label reference.
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+ GA->getValueType(0));
+ } else if (const BlockAddressSDNode *BA
+ = dyn_cast<BlockAddressSDNode>(Op)) {
+ Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
+ BA->getValueType(0));
+ } else if (const ExternalSymbolSDNode *ES
+ = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
+ ES->getValueType(0));
+ } else
+ return;
+ break;
+ }
+ case 'Y':
+ if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFP->isExactlyValue(0.0)) {
+ Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
+ break;
+ }
+ }
+ return;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // It's an unknown constraint for us. Let generic code have a go.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+ const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &AArch64::GPR32RegClass);
+ else if (VT == MVT::i64)
+ return std::make_pair(0U, &AArch64::GPR64RegClass);
+ break;
+ case 'w':
+ if (VT == MVT::f16)
+ return std::make_pair(0U, &AArch64::FPR16RegClass);
+ else if (VT == MVT::f32)
+ return std::make_pair(0U, &AArch64::FPR32RegClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
+ else if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, &AArch64::VPR64RegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
+ else if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &AArch64::VPR128RegClass);
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
new file mode 100644
index 000000000000..4960d286e9de
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // This is a conditional branch which also notes the flag needed
+ // (eq/sgt/...). A64 puts this information on the branches rather than
+ // compares as LLVM does.
+ BR_CC,
+
+ // A node to be selected to an actual call operation: either BL or BLR in
+ // the absence of tail calls.
+ Call,
+
+ // Indicates a floating-point immediate which fits into the format required
+ // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+ // value of that immediate.
+ FPMOV,
+
+ // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+ // and an LSB.
+ EXTR,
+
+ // Wraps a load from the GOT, which should always be performed with a 64-bit
+ // load instruction. This prevents the DAG combiner folding a truncate to
+ // form a smaller memory access.
+ GOTLoad,
+
+ // Performs a bitfield insert. Arguments are: the value being inserted into;
+ // the value being inserted; least significant bit changed; width of the
+ // field.
+ BFI,
+
+ // Simply a convenient node inserted during ISelLowering to represent
+ // procedure return. Will almost certainly be selected to "RET".
+ Ret,
+
+ /// Extracts a field of contiguous bits from the source and sign extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ SBFX,
+
+ /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+ /// main difference is that it only has the values and an A64 condition,
+ /// which will be produced by a setcc instruction.
+ SELECT_CC,
+
+ /// This serves most of the functions of the LLVM SETCC instruction, for two
+ /// purposes. First, it prevents optimisations from fiddling with the
+ /// compare after we've moved the CondCode information onto the SELECT_CC or
+ /// BR_CC instructions. Second, it gives a legal instruction for the actual
+ /// comparison.
+ ///
+ /// It keeps a record of the condition flags asked for because certain
+ /// instructions are only valid for a subset of condition codes.
+ SETCC,
+
+ // Designates a node which is a tail call: both a call and a return
+ // instruction as far as selction is concerned. It should be selected to an
+ // unconditional branch. Has the usual plethora of call operands, but: 1st
+ // is callee, 2nd is stack adjustment required immediately before branch.
+ TC_RETURN,
+
+ // Designates a call used to support the TLS descriptor ABI. The call itself
+ // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+ // var") must be attached somehow during code generation. It takes two
+ // operands: the callee and the symbol to be relocated against.
+ TLSDESCCALL,
+
+ // Leaf node which will be lowered to an appropriate MRS to obtain the
+ // thread pointer: TPIDR_EL0.
+ THREAD_POINTER,
+
+ /// Extracts a field of contiguous bits from the source and zero extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ UBFX,
+
+ // Wraps an address which the ISelLowering phase has decided should be
+ // created using the small absolute memory model: i.e. adrp/add or
+ // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+ // get selected.
+ WrapperSmall
+ };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+ explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const;
+
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ /// Finds the incoming stack arguments which overlap the given fixed stack
+ /// object and incorporates their load into the current chain. This prevents
+ /// an upcoming store from clobbering the stack argument before it's used.
+ SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo *MFI, int ClobberedFI) const;
+
+ EVT getSetCCResultType(EVT VT) const;
+
+ bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+ bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isLegalICmpImmediate(int64_t Val) const;
+ SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *
+ emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Size, unsigned Opcode) const;
+
+ MachineBasicBlock *
+ emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned CmpOp,
+ A64CC::CondCodes Cond) const;
+ MachineBasicBlock *
+ emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size) const;
+
+ MachineBasicBlock *
+ EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+ const AArch64Subtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
new file mode 100644
index 000000000000..cb93471058df
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,961 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file describes AArch64 instruction formats, down to the level of the
+// instruction's overall class.
+// ===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// A64 Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+// A64 is currently the only instruction set supported by the AArch64
+// architecture.
+class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : Instruction {
+ // All A64 instructions are 32-bit. This field will be filled in
+ // gradually going down the hierarchy.
+ field bits<32> Inst;
+
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+
+ // LLVM-level model of the AArch64/A64 distinction.
+ let Namespace = "AArch64";
+ let DecoderNamespace = "A64";
+ let Size = 4;
+
+ // Set the templated fields
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = patterns;
+ let Itinerary = itin;
+}
+
+class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
+ let Namespace = "AArch64";
+
+ let OutOperandList = outs;
+ let InOperandList= ins;
+ let Pattern = patterns;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// Represents a pseudo-instruction that represents a single A64 instruction for
+// whatever reason, the eventual result will be a 32-bit real instruction.
+class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
+ : PseudoInst<outs, ins, patterns> {
+ let Size = 4;
+}
+
+// As above, this will be a single A64 instruction, but we can actually give the
+// expansion in TableGen.
+class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
+ : A64PseudoInst<outs, ins, patterns>,
+ PseudoInstExpansion<Result>;
+
+
+// First, some common cross-hierarchy register formats.
+
+class A64InstRd<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rd;
+
+ let Inst{4-0} = Rd;
+}
+
+class A64InstRt<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt;
+
+ let Inst{4-0} = Rt;
+}
+
+
+class A64InstRdn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+class A64InstRtn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+// Instructions taking Rt,Rt2,Rn
+class A64InstRtt2n<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt2;
+
+ let Inst{14-10} = Rt2;
+}
+
+class A64InstRdnm<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ let Inst{20-16} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Actual A64 Instruction Formats
+//
+
+// Format for Add-subtract (extended register) instructions.
+class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<3> Imm3;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = opt;
+ let Inst{21} = 0b1;
+ // Rm inherited in 20-16
+ let Inst{15-13} = option;
+ let Inst{12-10} = Imm3;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (immediate) instructions.
+class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<12> Imm12;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b10001;
+ let Inst{23-22} = shift;
+ let Inst{21-10} = Imm12;
+}
+
+// Format for Add-subtract (shifted register) instructions.
+class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = shift;
+ let Inst{21} = 0b0;
+ // Rm inherited in 20-16
+ let Inst{15-10} = Imm6;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (with carry) instructions.
+class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010000;
+ // Rm inherited in 20-16
+ let Inst{15-10} = opcode2;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+
+// Format for Bitfield instructions
+class A64I_bitfield<bit sf, bits<2> opc, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{22} = n;
+ let Inst{21-16} = ImmR;
+ let Inst{15-10} = ImmS;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for compare and branch (immediate) instructions.
+class A64I_cmpbr<bit sf, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+
+ let Inst{31} = sf;
+ let Inst{30-25} = 0b011010;
+ let Inst{24} = op;
+ let Inst{23-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for conditional branch (immediate) instructions.
+class A64I_condbr<bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+ bits<4> Cond;
+
+ let Inst{31-25} = 0b0101010;
+ let Inst{24} = o1;
+ let Inst{23-5} = Label;
+ let Inst{4} = o0;
+ let Inst{3-0} = Cond;
+}
+
+// Format for conditional compare (immediate) instructions.
+class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> UImm5;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = UImm5;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b1;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional compare (register) instructions.
+class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b0;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional select instructions.
+class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010100;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = op2;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for data processing (1 source) instructions
+class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b1;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{20-16} = opcode2;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data processing (2 source) instructions
+class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data-processing (3 source) instructions
+
+class A64I_dp3<bit sf, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opcode{5-4};
+ let Inst{28-24} = 0b11011;
+ let Inst{23-21} = opcode{3-1};
+ // Inherits Rm in 20-16
+ let Inst{15} = opcode{0};
+ let Inst{14-10} = Ra;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for exception generation instructions
+class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = opc;
+ let Inst{20-5} = UImm16;
+ let Inst{4-2} = op2;
+ let Inst{1-0} = ll;
+}
+
+// Format for extract (immediate) instructions
+class A64I_extract<bit sf, bits<3> op, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> LSB;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = op{2-1};
+ let Inst{28-23} = 0b100111;
+ let Inst{22} = n;
+ let Inst{21} = op{0};
+ // Inherits Rm in bits 20-16
+ let Inst{15-10} = LSB;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for floating-point compare instructions.
+class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = op;
+ let Inst{13-10} = 0b1000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = opcode2;
+}
+
+// Format for floating-point conditional compare instructions.
+class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4} = op;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for floating-point conditional select instructions.
+class A64I_fpcondsel<bit m, bit s, bits<2> type,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point data-processing (1 source) instructions.
+class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (2 sources) instructions.
+class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (3 sources) instructions.
+class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11111;
+ let Inst{23-22} = type;
+ let Inst{21} = o1;
+ // Inherit Rm in 20-16
+ let Inst{15} = o0;
+ let Inst{14-10} = Ra;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> fixed-point conversion instructions.
+class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> Scale;
+
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b0;
+ let Inst{20-19} = mode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = Scale;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> integer conversion instructions.
+class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point immediate instructions.
+class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<8> Imm8;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-13} = Imm8;
+ let Inst{12-10} = 0b100;
+ let Inst{9-5} = imm5;
+ // Inherit Rd in 4-0
+}
+
+// Format for load-register (literal) instructions.
+class A64I_LDRlit<bits<2> opc, bit v,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Imm19;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = Imm19;
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store exclusive instructions.
+class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-30} = size;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = o2;
+ let Inst{22} = L;
+ let Inst{21} = o1;
+ let Inst{15} = o0;
+}
+
+class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rs;
+ let Inst{20-16} = Rs;
+}
+
+class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+// Format for load-store register (immediate post-indexed) instructions
+class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b01;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (immediate pre-indexed) instructions
+class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (unprivileged) instructions
+class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store (unscaled immediate) instructions.
+class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+
+// Format for load-store (unsigned immediate) instructions.
+class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<12> UImm12;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opc;
+ let Inst{21-10} = UImm12;
+}
+
+// Format for load-store register (register offset) instructions.
+class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ // Complex operand selection needed for these instructions, so they
+ // need an "addr" field for encoding/decoding to be generated.
+ bits<3> Ext;
+ // OptionHi = Ext{2-1}
+ // S = Ext{0}
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = Ext{2-1};
+ let Inst{13} = optionlo;
+ let Inst{12} = Ext{0};
+ let Inst{11-10} = 0b10;
+ // Inherits Rn in 9-5
+ // Inherits Rt in 4-0
+
+ let AddedComplexity = 50;
+}
+
+// Format for Load-store register pair (offset) instructions
+class A64I_LSPoffset<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b010;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (post-indexed) instructions
+class A64I_LSPpostind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b001;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (pre-indexed) instructions
+class A64I_LSPpreind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b011;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store non-temporal register pair (offset) instructions
+class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b000;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Logical (immediate) instructions
+class A64I_logicalimm<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bit N;
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
+ // selection), so we'll combine them into a single field here.
+ bits<13> Imm;
+ // N = Imm{12};
+ // ImmR = Imm{11-6};
+ // ImmS = Imm{5-0};
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100100;
+ let Inst{22} = Imm{12};
+ let Inst{21-16} = Imm{11-6};
+ let Inst{15-10} = Imm{5-0};
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Logical (shifted register) instructions
+class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-24} = 0b01010;
+ let Inst{23-22} = shift;
+ let Inst{21} = N;
+ // Rm inherited
+ let Inst{15-10} = Imm6;
+ // Rn inherited
+ // Rd inherited
+}
+
+// Format for Move wide (immediate)
+class A64I_movw<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+ bits<2> Shift; // Called "hw" officially
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = Shift;
+ let Inst{20-5} = UImm16;
+ // Inherits Rd in 4-0
+}
+
+// Format for PC-relative addressing instructions, ADR and ADRP.
+class A64I_PCADR<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<21> Label;
+
+ let Inst{31} = op;
+ let Inst{30-29} = Label{1-0};
+ let Inst{28-24} = 0b10000;
+ let Inst{23-5} = Label{20-2};
+}
+
+// Format for system instructions
+class A64I_system<bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<2> Op0;
+ bits<3> Op1;
+ bits<4> CRn;
+ bits<4> CRm;
+ bits<3> Op2;
+ bits<5> Rt;
+
+ let Inst{31-22} = 0b1101010100;
+ let Inst{21} = l;
+ let Inst{20-19} = Op0;
+ let Inst{18-16} = Op1;
+ let Inst{15-12} = CRn;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = Op2;
+ let Inst{4-0} = Rt;
+
+ // These instructions can do horrible things.
+ let hasSideEffects = 1;
+}
+
+// Format for unconditional branch (immediate) instructions
+class A64I_Bimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<26> Label;
+
+ let Inst{31} = op;
+ let Inst{30-26} = 0b00101;
+ let Inst{25-0} = Label;
+}
+
+// Format for Test & branch (immediate) instructions
+class A64I_TBimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<6> Imm;
+ bits<14> Label;
+
+ let Inst{31} = Imm{5};
+ let Inst{30-25} = 0b011011;
+ let Inst{24} = op;
+ let Inst{23-19} = Imm{4-0};
+ let Inst{18-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for Unconditional branch (register) instructions, including
+// RET. Shares no fields with instructions further up the hierarchy
+// so top-level.
+class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<5> Rn;
+
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = opc;
+ let Inst{20-16} = op2;
+ let Inst{15-10} = op3;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = op4;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
new file mode 100644
index 000000000000..cf3a2c3707d9
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -0,0 +1,822 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0;
+ unsigned ZeroReg = 0;
+ if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+ // E.g. ADD xDst, xsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+ // E.g. ADD wDST, wsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ // E.g. MSR NZCV, xDST
+ BuildMI(MBB, I, DL, get(AArch64::MSRix))
+ .addImm(A64SysReg::NZCV)
+ .addReg(SrcReg);
+ } else if (SrcReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(DestReg));
+ // E.g. MRS xDST, NZCV
+ BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+ .addImm(A64SysReg::NZCV);
+ } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+ assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+ // FIXME: there's no good way to do this, at least without NEON:
+ // + There's no single move instruction for q-registers
+ // + We can't create a spill slot and use normal STR/LDR because stack
+ // allocation has already happened
+ // + We can't go via X-registers with FMOV because register allocation has
+ // already happened.
+ // This may not be efficient, but at least it works.
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ } else {
+ llvm_unreachable("Unknown register class in copyPhysReg");
+ }
+
+ // E.g. ORR xDst, xzr, xSrc, lsl #0
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(ZeroReg)
+ .addReg(SrcReg)
+ .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0)
+ .addImm(Offset)
+ .addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+ return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+ Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+ Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+ Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ switch(I->getOpcode()) {
+ case AArch64::Bcc:
+ case AArch64::CBZw:
+ case AArch64::CBZx:
+ case AArch64::CBNZw:
+ case AArch64::CBNZx:
+ // These instructions just have one predicate operand in position 0 (either
+ // a condition code or a register being compared).
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ TBB = I->getOperand(1).getMBB();
+ return;
+ case AArch64::TBZwii:
+ case AArch64::TBZxii:
+ case AArch64::TBNZwii:
+ case AArch64::TBNZxii:
+ // These have two predicate operands: a register and a bit position.
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ Cond.push_back(I->getOperand(1));
+ TBB = I->getOperand(2).getMBB();
+ return;
+ default:
+ llvm_unreachable("Unknown conditional branch to classify");
+ }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == AArch64::Bimm) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranch(LastOpc)) {
+ classifyCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && LastOpc == AArch64::Bimm) {
+ while (SecondLastOpc == AArch64::Bimm) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (LastOpc == AArch64::Bimm) {
+ if (SecondLastOpc == AArch64::Bcc) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(SecondLastOpc)) {
+ classifyCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ switch (Cond[0].getImm()) {
+ case AArch64::Bcc: {
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+ CC = A64InvertCondCode(CC);
+ Cond[1].setImm(CC);
+ return false;
+ }
+ case AArch64::CBZw:
+ Cond[0].setImm(AArch64::CBNZw);
+ return false;
+ case AArch64::CBZx:
+ Cond[0].setImm(AArch64::CBNZx);
+ return false;
+ case AArch64::CBNZw:
+ Cond[0].setImm(AArch64::CBZw);
+ return false;
+ case AArch64::CBNZx:
+ Cond[0].setImm(AArch64::CBZx);
+ return false;
+ case AArch64::TBZwii:
+ Cond[0].setImm(AArch64::TBNZwii);
+ return false;
+ case AArch64::TBZxii:
+ Cond[0].setImm(AArch64::TBNZxii);
+ return false;
+ case AArch64::TBNZwii:
+ Cond[0].setImm(AArch64::TBZwii);
+ return false;
+ case AArch64::TBNZxii:
+ Cond[0].setImm(AArch64::TBZxii);
+ return false;
+ default:
+ llvm_unreachable("Unknown branch type");
+ }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ if (FBB == 0 && Cond.empty()) {
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+ return 1;
+ } else if (FBB == 0) {
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+ return 1;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+ return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::TLSDESC_BLRx: {
+ MachineInstr *NewMI =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+ .addOperand(MI.getOperand(1));
+ MI.setDesc(get(AArch64::BLRx));
+
+ llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+ return true;
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned StoreOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: StoreOp = AArch64::LS32_STR; break;
+ case 8: StoreOp = AArch64::LS64_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+ RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: StoreOp = AArch64::LSFP32_STR; break;
+ case 8: StoreOp = AArch64::LSFP64_STR; break;
+ case 16: StoreOp = AArch64::LSFP128_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+ NewMI.addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned LoadOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: LoadOp = AArch64::LS32_LDR; break;
+ case 8: LoadOp = AArch64::LS64_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+ || RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: LoadOp = AArch64::LSFP32_LDR; break;
+ case 8: LoadOp = AArch64::LSFP64_LDR; break;
+ case 16: LoadOp = AArch64::LSFP128_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+ unsigned Limit = (1 << 16) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+ // is the largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ Limit = std::min(Limit, 0xfffu);
+ break;
+ }
+
+ int AccessScale, MinOffset, MaxOffset;
+ getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+ Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+ int &AccessScale, int &MinOffset,
+ int &MaxOffset) const {
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unkown load/store kind");
+ case TargetOpcode::DBG_VALUE:
+ AccessScale = 1;
+ MinOffset = INT_MIN;
+ MaxOffset = INT_MAX;
+ return;
+ case AArch64::LS8_LDR: case AArch64::LS8_STR:
+ case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+ case AArch64::LDRSBw:
+ case AArch64::LDRSBx:
+ AccessScale = 1;
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ return;
+ case AArch64::LS16_LDR: case AArch64::LS16_STR:
+ case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+ case AArch64::LDRSHw:
+ case AArch64::LDRSHx:
+ AccessScale = 2;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS32_LDR: case AArch64::LS32_STR:
+ case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+ case AArch64::LDRSWx:
+ case AArch64::LDPSWx:
+ AccessScale = 4;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS64_LDR: case AArch64::LS64_STR:
+ case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+ case AArch64::PRFM:
+ AccessScale = 8;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+ AccessScale = 16;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+ case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+ AccessScale = 4;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+ case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+ AccessScale = 8;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+ AccessScale = 16;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ const MachineBasicBlock &MBB = *MI.getParent();
+ const MachineFunction &MF = *MBB.getParent();
+ const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ if (MI.getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+ if (MI.isLabel())
+ return 0;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case AArch64::TLSDESCCALL:
+ return 0;
+ default:
+ llvm_unreachable("Unknown instruction class");
+ }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MFI.getObjectOffset(FrameRegIdx);
+ llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+ if (NumBytes == 0 && DstReg == SrcReg)
+ return;
+ else if (abs64(NumBytes) & ~0xffffff) {
+ // Generically, we have to materialize the offset into a temporary register
+ // and subtract it. There are a couple of ways this could be done, for now
+ // we'll use a movz/movk or movn/movk sequence.
+ uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
+ .addImm(0xffff & Bits).addImm(0)
+ .setMIFlags(MIFlags);
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(1)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(2)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(3)
+ .setMIFlags(MIFlags);
+ }
+
+ // ADD DST, SRC, xTMP (, lsl #0)
+ unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+ BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(0)
+ .setMIFlag(MIFlags);
+ return;
+ }
+
+ // Now we know that the adjustment can be done in at most two add/sub
+ // (immediate) instructions, which is always more efficient than a
+ // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+ // Decide whether we're doing addition or subtraction
+ unsigned LowOp, HighOp;
+ if (NumBytes >= 0) {
+ LowOp = AArch64::ADDxxi_lsl0_s;
+ HighOp = AArch64::ADDxxi_lsl12_s;
+ } else {
+ LowOp = AArch64::SUBxxi_lsl0_s;
+ HighOp = AArch64::SUBxxi_lsl12_s;
+ NumBytes = abs64(NumBytes);
+ }
+
+ // If we're here, at the very least a move needs to be produced, which just
+ // happens to be materializable by an ADD.
+ if ((NumBytes & 0xfff) || NumBytes == 0) {
+ BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes & 0xfff)
+ .setMIFlag(MIFlags);
+
+ // Next update should use the register we've just defined.
+ SrcReg = DstReg;
+ }
+
+ if (NumBytes & 0xfff000) {
+ BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes >> 12)
+ .setMIFlag(MIFlags);
+ }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags) {
+ emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+ NumBytes, MIFlags);
+}
+
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ AArch64MachineFunctionInfo* MFI
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case AArch64::TLSDESC_BLRx:
+ // Make sure it's a local dynamic access.
+ if (!I->getOperand(1).isSymbol() ||
+ strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ break;
+
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+ // code sequence assumes the address will be.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ AArch64::X0)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ // Insert a copy from X0 to TLSBaseAddrReg for later.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(AArch64::X0);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
new file mode 100644
index 000000000000..22a2ab4cf60a
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -0,0 +1,112 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+ const AArch64RegisterInfo RI;
+ const AArch64Subtarget &Subtarget;
+public:
+ explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ /// Look through the instructions in this function and work out the largest
+ /// the stack frame can be while maintaining the ability to address local
+ /// slots with no complexities.
+ unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+ /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+ /// immediate offset, this function determines the constraints required for
+ /// the immediate. It must satisfy:
+ /// + MinOffset <= imm <= MaxOffset
+ /// + imm % OffsetScale == 0
+ void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+ int &MinOffset, int &MaxOffset) const;
+
+
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
new file mode 100644
index 000000000000..37be5e4892e4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,5099 @@
+//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AArch64 scalar instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Target-specific ISD nodes and profiles
+//===----------------------------------------------------------------------===//
+
+def SDT_A64ret : SDTypeProfile<0, 0, []>;
+def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
+ SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// (ins NZCV, Condition, Dest)
+def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
+def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
+
+// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
+def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>]>;
+def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
+
+// (outs NZCV), (ins LHS, RHS, Condition)
+def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>]>;
+def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
+
+
+// (outs GPR64), (ins)
+def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+// A64 compares don't care about the cond really (they set all flags) so a
+// simple binary operator is useful.
+def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, node:$rhs, cond)>;
+
+
+// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
+// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
+// and V flags can be set differently by this operation. It comes down to
+// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
+// then everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+
+// So, finally, the only LLVM-native comparisons that don't mention C and V are
+// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
+// absence of information about op2.
+def equality_cond : PatLeaf<(cond), [{
+ return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
+}]>;
+
+def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
+
+// There are two layers of indirection here, driven by the following
+// considerations.
+// + TableGen does not know CodeModel or Reloc so that decision should be
+// made for a variable/address at ISelLowering.
+// + The output of ISelLowering should be selectable (hence the Wrapper,
+// rather than a bare target opcode)
+def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i32>,
+ SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+
+
+def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
+ [SDNPHasChain]>;
+
+
+// (A64BFI LHS, RHS, LSB, Width)
+def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>,
+ SDTCisVT<4, i64>]>;
+
+def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
+
+// (A64EXTR HiReg, LoReg, LSB)
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>]>;
+def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+// (A64[SU]BFX Field, ImmR, ImmS).
+//
+// Note that ImmR and ImmS are already encoded for the actual instructions. The
+// more natural LSB and Width mix together to form ImmR and ImmS, something
+// which TableGen can't handle.
+def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
+
+def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+
+//===----------------------------------------------------------------------===//
+// Call sequence pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+
+def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
+// destination but needs a relocation against a fixed symbol. As such it has two
+// certain operands: the callee and the relocated variable.
+//
+// The TLS ABI only allows it to be selected to a BLR instructin (with
+// appropriate relocation).
+def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
+def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
+
+def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+
+// These pseudo-instructions have special semantics by virtue of being passed to
+// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
+// LowerCall to (in our case) tell the back-end about stack adjustments for
+// arguments passed on the stack. Here we select those markers to
+// pseudo-instructions which explicitly set the stack, and finally in the
+// RegisterInfo we convert them to a true stack adjustment.
+let Defs = [XSP], Uses = [XSP] in {
+ def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
+ [(AArch64callseq_start timm:$amt)]>;
+
+ def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operation pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+multiclass AtomicSizes<string opname> {
+ def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
+ def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
+ def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
+ def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
+ [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
+}
+}
+
+defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">;
+defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">;
+defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
+defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
+defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
+defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
+defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+ // These operations need a CMP to calculate the correct value
+ defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
+ defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
+ defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+ defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+def ATOMIC_CMP_SWAP_I8
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I16
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I32
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I64
+ : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
+ [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (extended register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
+
+// The RHS of these operations is conceptually a sign/zero-extended
+// register, optionally shifted left by 1-4. The extension can be a
+// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
+// must be specified with one exception:
+
+// If one of the registers is sp/wsp then LSL is an alias for UXTW in
+// 32-bit instructions and UXTX in 64-bit versions, the shift amount
+// is not optional in that case (but can explicitly be 0), and the
+// entire suffix can be skipped (e.g. "add sp, x3, x2").
+
+multiclass extend_operands<string PREFIX, string Diag> {
+ def _asmoperand : AsmOperandClass {
+ let Name = PREFIX;
+ let RenderMethod = "addRegExtendOperands";
+ let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
+ let DiagnosticType = "AddSubRegExtend" # Diag;
+ }
+
+ def _operand : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
+ let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
+ let DecoderMethod = "DecodeRegExtendOperand";
+ let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
+ }
+}
+
+defm UXTB : extend_operands<"UXTB", "Small">;
+defm UXTH : extend_operands<"UXTH", "Small">;
+defm UXTW : extend_operands<"UXTW", "Small">;
+defm UXTX : extend_operands<"UXTX", "Large">;
+defm SXTB : extend_operands<"SXTB", "Small">;
+defm SXTH : extend_operands<"SXTH", "Small">;
+defm SXTW : extend_operands<"SXTW", "Small">;
+defm SXTX : extend_operands<"SXTX", "Large">;
+
+def LSL_extasmoperand : AsmOperandClass {
+ let Name = "RegExtendLSL";
+ let RenderMethod = "addRegExtendOperands";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+def LSL_extoperand : Operand<i64> {
+ let ParserMatchClass = LSL_extasmoperand;
+}
+
+
+// The patterns for various sign-extensions are a little ugly and
+// non-uniform because everything has already been promoted to the
+// legal i64 and i32 types. We'll wrap the various variants up in a
+// class for use later.
+class extend_types {
+ dag uxtb; dag uxth; dag uxtw; dag uxtx;
+ dag sxtb; dag sxth; dag sxtw; dag sxtx;
+ ValueType ty;
+ RegisterClass GPR;
+}
+
+def extends_to_i64 : extend_types {
+ let uxtb = (and (anyext i32:$Rm), 255);
+ let uxth = (and (anyext i32:$Rm), 65535);
+ let uxtw = (zext i32:$Rm);
+ let uxtx = (i64 $Rm);
+
+ let sxtb = (sext_inreg (anyext i32:$Rm), i8);
+ let sxth = (sext_inreg (anyext i32:$Rm), i16);
+ let sxtw = (sext i32:$Rm);
+ let sxtx = (i64 $Rm);
+
+ let ty = i64;
+ let GPR = GPR64xsp;
+}
+
+
+def extends_to_i32 : extend_types {
+ let uxtb = (and i32:$Rm, 255);
+ let uxth = (and i32:$Rm, 65535);
+ let uxtw = (i32 i32:$Rm);
+ let uxtx = (i32 i32:$Rm);
+
+ let sxtb = (sext_inreg i32:$Rm, i8);
+ let sxth = (sext_inreg i32:$Rm, i16);
+ let sxtw = (i32 i32:$Rm);
+ let sxtx = (i32 i32:$Rm);
+
+ let ty = i32;
+ let GPR = GPR32wsp;
+}
+
+// Now, six of the extensions supported are easy and uniform: if the source size
+// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
+// those instructions in one block.
+
+// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
+// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
+// be impossible.
+// + Patterns are very different as well.
+// + Passing different registers would be ugly (more fields in extend_types
+// would probably be the best option).
+multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
+ SDPatternOperator opfrag,
+ dag outs, extend_types exts> {
+ def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+ NoItinerary>;
+
+ def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
+ outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+ NoItinerary>;
+}
+
+// These two could be merge in with the above, but their patterns aren't really
+// necessary and the naming-scheme would necessarily break:
+multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
+ dag outs> {
+ def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
+ NoItinerary>;
+
+ def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: same as uxtx */],
+ NoItinerary>;
+}
+
+multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
+ def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No pattern: probably same as uxtw */],
+ NoItinerary>;
+
+ def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: probably same as uxtw */],
+ NoItinerary>;
+}
+
+class SetRD<RegisterClass RC, SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
+class SetNZCV<SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
+
+defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd)>;
+defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
+ (outs GPR32wsp:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd)>;
+defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
+ (outs GPR32wsp:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+
+let Defs = [NZCV] in {
+defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd)>;
+defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
+ (outs GPR32:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
+ (outs GPR32:$Rd)>;
+defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd)>;
+defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
+ (outs GPR32:$Rd), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
+ (outs GPR32:$Rd)>;
+
+
+let Rd = 0b11111, isCompare = 1 in {
+defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i64>,
+ addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
+defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i32>,
+ addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
+defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i64>,
+ addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
+defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i32>,
+ addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
+}
+}
+
+// Now patterns for the operation without a shift being needed. No patterns are
+// created for uxtx/sxtx since they're non-uniform and it's expected that
+// add/sub (shifted register) will handle those cases anyway.
+multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
+ extend_types exts> {
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
+ (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
+ (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
+ (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
+
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
+ (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
+ (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
+ def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
+ (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
+
+// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
+// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
+// operation. Also permitted in this case is complete omission of the argument,
+// which implies "lsl #0".
+multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+
+}
+
+defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+
+// Rd cannot be sp for flag-setting variants so only half of the aliases are
+// needed.
+defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
+defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
+
+// CMP unfortunately has to be different because the instruction doesn't have a
+// dest register.
+multiclass cmp_lsl_aliases<string asmop, Instruction inst,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+}
+
+defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
+defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
+
+// These instructions accept a 12-bit unsigned immediate, optionally shifted
+// left by 12 bits. Official assembly format specifies a 12 bit immediate with
+// one of "", "LSL #0", "LSL #12" supplementary operands.
+
+// There are surprisingly few ways to make this work with TableGen, so this
+// implementation has separate instructions for the "LSL #0" and "LSL #12"
+// variants.
+
+// If the MCInst retained a single combined immediate (which could be 0x123000,
+// for example) then both components (imm & shift) would have to be delegated to
+// a single assembly operand. This would entail a separate operand parser
+// (because the LSL would have to live in the same AArch64Operand as the
+// immediate to be accessible); assembly parsing is rather complex and
+// error-prone C++ code.
+//
+// By splitting the immediate, we can delegate handling this optional operand to
+// an InstAlias. Supporting functions to generate the correct MCInst are still
+// required, but these are essentially trivial and parsing can remain generic.
+//
+// Rejected plans with rationale:
+// ------------------------------
+//
+// In an ideal world you'de have two first class immediate operands (in
+// InOperandList, specifying imm12 and shift). Unfortunately this is not
+// selectable by any means I could discover.
+//
+// An Instruction with two MCOperands hidden behind a single entry in
+// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
+// but required more C++ code to handle encoding/decoding. Parsing (the intended
+// main beneficiary) ended up equally complex because of the optional nature of
+// "LSL #0".
+//
+// Attempting to circumvent the need for a custom OperandParser above by giving
+// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
+// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
+// should be parsed: there was no way to accommodate an "lsl #12".
+
+let ParserMethod = "ParseImmWithLSLOperand",
+ RenderMethod = "addImmWithLSLOperands" in {
+ // Derived PredicateMethod fields are different for each
+ def addsubimm_lsl0_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL0";
+ // If an error is reported against this operand, instruction could also be a
+ // register variant.
+ let DiagnosticType = "AddSubSecondSource";
+ }
+
+ def addsubimm_lsl12_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL12";
+ let DiagnosticType = "AddSubSecondSource";
+ }
+}
+
+def shr_12_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
+}]>;
+
+def shr_12_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
+}]>;
+
+def neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
+}]>;
+
+
+multiclass addsub_imm_operands<ValueType ty> {
+ let PrintMethod = "printAddSubImmLSL0Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl0_asmoperand in {
+ def _posimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
+ def _negimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
+ neg_XFORM>;
+ }
+
+ let PrintMethod = "printAddSubImmLSL12Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl12_asmoperand in {
+ def _posimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
+ shr_12_XFORM>;
+
+ def _negimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
+ shr_12_neg_XFORM>;
+ }
+}
+
+// The add operands don't need any transformation
+defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
+defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
+
+multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
+ string asmop, string cmpasmop,
+ Operand imm_operand, Operand cmp_imm_operand,
+ RegisterClass GPR, RegisterClass GPRsp,
+ AArch64Reg ZR, ValueType Ty> {
+ // All registers for non-S variants allow SP
+ def _s : A64I_addsubimm<sf, op, 0b0, shift,
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
+ [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
+ NoItinerary>;
+
+
+ // S variants can read SP but would write to ZR
+ def _S : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs GPR:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
+ [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
+ NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ // Note that the pattern here for ADDS is subtle. Canonically CMP
+ // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
+ // ADDS a, (-b). This is not true in general.
+ def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(cmpasmop, " $Rn, $Imm12"),
+ [(set NZCV,
+ (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
+ NoItinerary> {
+ let Rd = 0b11111;
+ let Defs = [NZCV];
+ let isCompare = 1;
+ }
+}
+
+
+multiclass addsubimm_shifts<string prefix, bit sf, bit op,
+ string asmop, string cmpasmop, string operand, string cmpoperand,
+ RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
+ ValueType Ty> {
+ defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl0"),
+ !cast<Operand>(cmpoperand # "_lsl0"),
+ GPR, GPRsp, ZR, Ty>;
+
+ defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl12"),
+ !cast<Operand>(cmpoperand # "_lsl12"),
+ GPR, GPRsp, ZR, Ty>;
+}
+
+defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
+ "addsubimm_operand_i32_posimm",
+ "addsubimm_operand_i32_negimm",
+ GPR32, GPR32wsp, WZR, i32>;
+defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
+ "addsubimm_operand_i64_posimm",
+ "addsubimm_operand_i64_negimm",
+ GPR64, GPR64xsp, XZR, i64>;
+defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
+ "addsubimm_operand_i32_negimm",
+ "addsubimm_operand_i32_posimm",
+ GPR32, GPR32wsp, WZR, i32>;
+defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
+ "addsubimm_operand_i64_negimm",
+ "addsubimm_operand_i64_posimm",
+ GPR64, GPR64xsp, XZR, i64>;
+
+multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
+ def _fromsp : InstAlias<"mov $Rd, $Rn",
+ (addop GPRsp:$Rd, SP:$Rn, 0),
+ 0b1>;
+
+ def _tosp : InstAlias<"mov $Rd, $Rn",
+ (addop SP:$Rd, GPRsp:$Rn, 0),
+ 0b1>;
+}
+
+// Recall Rxsp is a RegisterClass containing *just* xsp.
+defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
+defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
+
+//===-------------------------------
+// 1. The "shifed register" operands. Shared with logical insts.
+//===-------------------------------
+
+multiclass shift_operands<string prefix, string form> {
+ def _asmoperand_i32 : AsmOperandClass {
+ let Name = "Shift" # form # "i32";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", false>";
+ let DiagnosticType = "AddSubRegShift32";
+ }
+
+ // Note that the operand type is intentionally i64 because the DAGCombiner
+ // puts these into a canonical form.
+ def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ let DecoderMethod = "Decode32BitShiftOperand";
+ }
+
+ def _asmoperand_i64 : AsmOperandClass {
+ let Name = "Shift" # form # "i64";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", true>";
+ let DiagnosticType = "AddSubRegShift64";
+ }
+
+ def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ }
+}
+
+defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
+defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
+defm asr_operand : shift_operands<"asr_operand", "ASR">;
+
+// Not used for add/sub, but defined here for completeness. The "logical
+// (shifted register)" instructions *do* have an ROR variant.
+defm ror_operand : shift_operands<"ror_operand", "ROR">;
+
+//===-------------------------------
+// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
+//===-------------------------------
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_addsubshift<sf, op, s, 0b00,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, s, 0b01,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, s, 0b10,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
+ commutable, asmop, opfrag, i64, GPR64, defs>;
+ defm www : addsub_shifts<prefix # "www", 0b0, op, s,
+ commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
+defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+
+defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
+defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+
+//===-------------------------------
+// 1. The NEG/NEGS aliases
+//===-------------------------------
+
+multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
+ ValueType ty, Operand shift_operand, SDNode shiftop> {
+ def : InstAlias<"neg $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+ def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
+ (INST ZR, $Rm, shift_operand:$Imm6)>;
+}
+
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
+
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
+
+// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
+// be involved.
+class negs_alias<Instruction INST, RegisterClass GPR,
+ Register ZR, Operand shift_operand, SDNode shiftop>
+ : InstAlias<"negs $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+
+//===-------------------------------
+// 1. The CMP/CMN aliases
+//===-------------------------------
+
+multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR> {
+ let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
+
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (with carry) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
+
+multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
+ let Uses = [NZCV] in {
+ def www : A64I_addsubcarry<0b0, op, s, 0b000000,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+
+ def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+ }
+}
+
+let isCommutable = 1 in {
+ defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
+}
+
+defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
+
+let Defs = [NZCV] in {
+ let isCommutable = 1 in {
+ defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
+ }
+
+ defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
+}
+
+def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+
+// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
+// addition). So the flag-setting instructions are appropriate.
+def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
+def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
+def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
+def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield
+//===----------------------------------------------------------------------===//
+// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
+// UBFIZ, UBFX
+
+// Because of the rather complicated nearly-overlapping aliases, the decoding of
+// this range of instructions is handled manually. The architectural
+// instructions are BFM, SBFM and UBFM but a disassembler should never produce
+// these.
+//
+// In the end, the best option was to use BFM instructions for decoding under
+// almost all circumstances, but to create aliasing *Instructions* for each of
+// the canonical forms and specify a completely custom decoder which would
+// substitute the correct MCInst as needed.
+//
+// This also simplifies instruction selection, parsing etc because the MCInsts
+// have a shape that's closer to their use in code.
+
+//===-------------------------------
+// 1. The architectural BFM instructions
+//===-------------------------------
+
+def uimm5_asmoperand : AsmOperandClass {
+ let Name = "UImm5";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm5";
+}
+
+def uimm6_asmoperand : AsmOperandClass {
+ let Name = "UImm6";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm6";
+}
+
+def bitfield32_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+
+ let DecoderMethod = "DecodeBitfield32ImmOperand";
+}
+
+
+def bitfield64_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+
+ // Default decoder works in 64-bit case: the 6-bit field can take any value.
+}
+
+multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+}
+
+defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
+defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
+
+// BFM instructions modify the destination register rather than defining it
+// completely.
+def BFMwwii :
+ A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+def BFMxxii :
+ A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+
+//===-------------------------------
+// 2. Extend aliases to 64-bit dest
+//===-------------------------------
+
+// Unfortunately the extensions that end up as 64-bits cannot be handled by an
+// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
+// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
+// not capable of such a map as far as I'm aware
+
+// Note that these instructions are strictly more specific than the
+// BFM ones (in ImmR) so they can handle their own decoding.
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
+ string asmop, bits<6> imms, dag pattern>
+ : A64I_bitfield<sf, opc, sf,
+ (outs GPRDest:$Rd), (ins GPR32:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [(set dty:$Rd, pattern)], NoItinerary> {
+ let ImmR = 0b000000;
+ let ImmS = imms;
+}
+
+// Signed extensions
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
+ (sext_inreg (anyext i32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
+ (sext_inreg i32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
+ (sext_inreg (anyext i32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
+ (sext_inreg i32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
+
+// Unsigned extensions
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
+ (and i32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
+ (and i32:$Rn, 65535)>;
+
+// The 64-bit unsigned variants are not strictly architectural but recommended
+// for consistency.
+let isAsmParserOnly = 1 in {
+ def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
+ (and (anyext i32:$Rn), 255)>;
+ def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
+ (and (anyext i32:$Rn), 65535)>;
+}
+
+// Extra patterns for when the source register is actually 64-bits
+// too. There's no architectural difference here, it's just LLVM
+// shinanigans. There's no need for equivalent zero-extension patterns
+// because they'll already be caught by logical (immediate) matching.
+def : Pat<(sext_inreg i64:$Rn, i8),
+ (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i16),
+ (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i32),
+ (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
+
+
+//===-------------------------------
+// 3. Aliases for ASR and LSR (the simple shifts)
+//===-------------------------------
+
+// These also handle their own decoding because ImmS being set makes
+// them take precedence over BFM.
+multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_bitfield<0b0, opc, 0b0,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 31;
+ }
+
+ def xxi : A64I_bitfield<0b1, opc, 0b1,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 63;
+ }
+
+}
+
+defm ASR : A64I_shift<0b00, "asr", sra>;
+defm LSR : A64I_shift<0b10, "lsr", srl>;
+
+//===-------------------------------
+// 4. Aliases for LSL
+//===-------------------------------
+
+// Unfortunately LSL and subsequent aliases are much more complicated. We need
+// to be able to say certain output instruction fields depend in a complex
+// manner on combinations of input assembly fields).
+//
+// MIOperandInfo *might* have been able to do it, but at the cost of
+// significantly more C++ code.
+
+// N.b. contrary to usual practice these operands store the shift rather than
+// the machine bits in an MCInst. The complexity overhead of consistency
+// outweighed the benefits in this case (custom asmparser, printer and selection
+// vs custom encoder).
+def bitfield32_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+ let EncoderMethod = "getBitfield32LSLOpValue";
+}
+
+def bitfield64_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+ let EncoderMethod = "getBitfield64LSLOpValue";
+}
+
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
+ Operand operand>
+ : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
+ "lsl\t$Rd, $Rn, $FullImm",
+ [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
+ NoItinerary> {
+ bits<12> FullImm;
+ let ImmR = FullImm{5-0};
+ let ImmS = FullImm{11-6};
+
+ // No disassembler allowed because it would overlap with BFM which does the
+ // actual work.
+ let isAsmParserOnly = 1;
+}
+
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
+
+//===-------------------------------
+// 5. Aliases for bitfield extract instructions
+//===-------------------------------
+
+def bfx32_width_asmoperand : AsmOperandClass {
+ let Name = "BFX32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx32_width_asmoperand;
+}
+
+def bfx64_width_asmoperand : AsmOperandClass {
+ let Name = "BFX64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfx64_width : Operand<i64> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx64_width_asmoperand;
+}
+
+
+multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
+defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
+
+// Again, variants based on BFM modify Rd so need it as an input too.
+def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+// SBFX instructions can do a 1-instruction sign-extension of boolean values.
+def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
+def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
+ (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
+
+// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
+// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
+def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+ sub_32)>;
+
+//===-------------------------------
+// 6. Aliases for bitfield insert instructions
+//===-------------------------------
+
+def bfi32_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI32LSB";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addBFILSBOperands<32>";
+ let DiagnosticType = "UImm5";
+}
+
+def bfi32_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let PrintMethod = "printBFILSBOperand<32>";
+ let ParserMatchClass = bfi32_lsb_asmoperand;
+}
+
+def bfi64_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI64LSB";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addBFILSBOperands<64>";
+ let DiagnosticType = "UImm6";
+}
+
+def bfi64_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let PrintMethod = "printBFILSBOperand<64>";
+ let ParserMatchClass = bfi64_lsb_asmoperand;
+}
+
+// Width verification is performed during conversion so width operand can be
+// shared between 32/64-bit cases. Still needed for the print method though
+// because ImmR encodes "width - 1".
+def bfi32_width_asmoperand : AsmOperandClass {
+ let Name = "BFI32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfi32_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi32_width_asmoperand;
+}
+
+def bfi64_width_asmoperand : AsmOperandClass {
+ let Name = "BFI64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfi64_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi64_width_asmoperand;
+}
+
+multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
+defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
+
+
+def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+//===----------------------------------------------------------------------===//
+// Compare and branch (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: CBZ, CBNZ
+
+class label_asmoperand<int width, int scale> : AsmOperandClass {
+ let Name = "Label" # width # "_" # scale;
+ let PredicateMethod = "isLabel<" # width # "," # scale # ">";
+ let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
+ let DiagnosticType = "Label";
+}
+
+def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
+
+// All conditional immediate branches are the same really: 19 signed bits scaled
+// by the instruction-size (4).
+def bcc_target : Operand<OtherVT> {
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let ParserMatchClass = label_wid19_scal4_asmoperand;
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
+ let OperandType = "OPERAND_PCREL";
+}
+
+multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
+ let isBranch = 1, isTerminator = 1 in {
+ def x : A64I_cmpbr<0b1, op,
+ (outs),
+ (ins GPR64:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+
+ def w : A64I_cmpbr<0b0, op,
+ (outs),
+ (ins GPR32:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+ }
+}
+
+defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
+ return Imm == A64CC::EQ;
+}]> >;
+defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
+ return Imm == A64CC::NE;
+}]> >;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B.cc
+
+def cond_code_asmoperand : AsmOperandClass {
+ let Name = "CondCode";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 15;
+}]> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_asmoperand;
+}
+
+def Bcc : A64I_condbr<0b0, 0b0, (outs),
+ (ins cond_code:$Cond, bcc_target:$Label),
+ "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
+ NoItinerary> {
+ let Uses = [NZCV];
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+def uimm4_asmoperand : AsmOperandClass {
+ let Name = "UImm4";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm4";
+}
+
+def uimm4 : Operand<i32> {
+ let ParserMatchClass = uimm4_asmoperand;
+}
+
+def uimm5 : Operand<i32> {
+ let ParserMatchClass = uimm5_asmoperand;
+}
+
+// The only difference between this operand and the one for instructions like
+// B.cc is that it's parsed manually. The other get parsed implicitly as part of
+// the mnemonic handling.
+def cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "CondCodeOp";
+ let RenderMethod = "addCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code_op : Operand<i32> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_op_asmoperand;
+}
+
+class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
+ (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
+
+// Condition code which is encoded as the inversion (semantically rather than
+// bitwise) in the instruction.
+def inv_cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "InvCondCodeOp";
+ let RenderMethod = "addInvCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def inv_cond_code_op : Operand<i32> {
+ let ParserMatchClass = inv_cond_code_op_asmoperand;
+}
+
+// Having a separate operand for the selectable use-case is debatable, but gives
+// consistency with cond_code.
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
+ return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+}]>;
+
+def inv_cond_code
+ : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
+
+
+multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
+ SDPatternOperator select> {
+ let Uses = [NZCV] in {
+ def wwwc : A64I_condsel<0b0, op, 0b0, op2,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
+ NoItinerary>;
+
+
+ def xxxc : A64I_condsel<0b1, op, 0b0, op2,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+ }
+}
+
+def simple_select
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
+
+class complex_select<SDPatternOperator opnode>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
+
+
+defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
+defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
+ complex_select<PatFrag<(ops node:$val),
+ (add node:$val, 1)>>>;
+defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
+defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
+
+// Now the instruction aliases, which fit nicely into LLVM's model:
+
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+
+// Finally some helper patterns.
+
+// For CSET (a.k.a. zero-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// For CSETM (a.k.a. sign-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
+// commutativity. The instructions are to complex for isCommutable to be used,
+// so we have to create the patterns manually:
+
+// No commutable pattern for CSEL since the commuted version is isomorphic.
+
+// CSINC
+def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
+ (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
+ (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSINV
+def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+ (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+ (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSNEG
+def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+ (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+ (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (1 source) instructions
+//===----------------------------------------------------------------------===//
+// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
+
+// We define an unary operator which always fails. We will use this to
+// define unary operators that cannot be matched.
+
+class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
+ list<dag> patterns, RegisterClass GPRrc,
+ InstrItinClass itin>:
+ A64I_dp_1src<sf,
+ 0,
+ 0b00000,
+ opcode,
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ (outs GPRrc:$Rd),
+ (ins GPRrc:$Rn),
+ patterns,
+ itin>;
+
+multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
+ let hasSideEffects = 0 in {
+ def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
+ def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
+ }
+}
+
+defm RBIT : A64I_dp_1src<0b000000, "rbit">;
+defm CLS : A64I_dp_1src<0b000101, "cls">;
+defm CLZ : A64I_dp_1src<0b000100, "clz">;
+
+def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
+def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
+
+def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
+def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
+
+
+def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
+ [(set i32:$Rd, (bswap i32:$Rn))],
+ GPR32, NoItinerary>;
+def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
+ [(set i64:$Rd, (bswap i64:$Rn))],
+ GPR64, NoItinerary>;
+def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
+ [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
+ GPR64, NoItinerary>;
+def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
+ [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
+ GPR32,
+ NoItinerary>;
+def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
+// LSR, ASR, ROR
+
+
+class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
+ RegisterClass GPRsp,
+ InstrItinClass itin>:
+ A64I_dp_2src<sf,
+ opcode,
+ 0,
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, GPRsp:$Rm),
+ patterns,
+ itin>;
+
+multiclass dp_2src_crc<bit c, string asmop> {
+ def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
+ !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
+ def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
+ !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
+ def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
+ !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
+ def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
+ !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
+ NoItinerary>;
+}
+
+multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set i32:$Rd,
+ (op i32:$Rn, (i64 (zext i32:$Rm))))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+
+multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+// Here we define the data processing 2 source instructions.
+defm CRC32 : dp_2src_crc<0b0, "crc32">;
+defm CRC32C : dp_2src_crc<0b1, "crc32c">;
+
+defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+
+defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+
+// Extra patterns for an incoming 64-bit value for a 32-bit
+// operation. Since the LLVM operations are undefined (as in C) if the
+// RHS is out of range, it's perfectly permissible to discard the high
+// bits of the GPR64.
+def : Pat<(shl i32:$Rn, i64:$Rm),
+ (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(srl i32:$Rn, i64:$Rm),
+ (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(sra i32:$Rn, i64:$Rm),
+ (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(rotr i32:$Rn, i64:$Rm),
+ (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+
+// Here we define the aliases for the data processing 2 source instructions.
+def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
+def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
+def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
+def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
+// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
+
+class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
+ ValueType AccTy, RegisterClass SrcReg,
+ string asmop, dag pattern>
+ : A64I_dp3<sf, opcode,
+ (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
+ [(set AccTy:$Rd, pattern)], NoItinerary> {
+ RegisterClass AccGPR = AccReg;
+ RegisterClass SrcGPR = SrcReg;
+}
+
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
+ (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
+ (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
+ (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
+ (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
+ (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
+ (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
+ (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
+ (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
+ def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "umulh\t$Rd, $Rn, $Rm",
+ [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+
+ def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "smulh\t$Rd, $Rn, $Rm",
+ [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
+ NoItinerary>;
+}
+
+multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
+ Register ZR, dag pattern> {
+ def : InstAlias<asmop # " $Rd, $Rn, $Rm",
+ (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+
+ def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
+}
+
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
+
+defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
+ (sub 0, (mul i32:$Rn, i32:$Rm))>;
+defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
+ (sub 0, (mul i64:$Rn, i64:$Rm))>;
+
+defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
+ (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
+defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
+ (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
+defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+
+//===----------------------------------------------------------------------===//
+// Exception generation
+//===----------------------------------------------------------------------===//
+// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+
+def uimm16_asmoperand : AsmOperandClass {
+ let Name = "UImm16";
+ let PredicateMethod = "isUImm<16>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm16";
+}
+
+def uimm16 : Operand<i32> {
+ let ParserMatchClass = uimm16_asmoperand;
+}
+
+class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
+ : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
+ !strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
+def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
+def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
+def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
+def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
+
+def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
+def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
+def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
+
+// The immediate is optional for the DCPS instructions, defaulting to 0.
+def : InstAlias<"dcps1", (DCPS1i 0)>;
+def : InstAlias<"dcps2", (DCPS2i 0)>;
+def : InstAlias<"dcps3", (DCPS3i 0)>;
+
+//===----------------------------------------------------------------------===//
+// Extract (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: EXTR + alias ROR
+
+def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set i32:$Rd,
+ (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
+ NoItinerary>;
+def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set i64:$Rd,
+ (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
+ NoItinerary>;
+
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
+
+def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
+ (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
+ (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCMP, FCMPE
+
+def fpzero_asmoperand : AsmOperandClass {
+ let Name = "FPZero";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPZero";
+}
+
+def fpz32 : Operand<f32>,
+ ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+def fpz64 : Operand<f64>,
+ ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
+ def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
+ (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+ NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
+ (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
+ let Defs = [NZCV];
+ }
+}
+
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
+ (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
+ (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
+
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+ (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
+
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+ (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCCMP, FCCMPE
+
+class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
+ : A64I_fpccmp<0b0, 0b0, type, op,
+ (outs),
+ (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
+def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
+def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
+def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCSEL
+
+let Uses = [NZCV] in {
+ def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set f32:$Rd,
+ (simple_select f32:$Rn, f32:$Rm))],
+ NoItinerary>;
+
+
+ def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set f64:$Rd,
+ (simple_select f64:$Rn, f64:$Rm))],
+ NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (1 source)
+//===----------------------------------------------------------------------===//
+// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
+
+def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
+ [{ (void)N; return false; }]>;
+
+// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
+// syntax. Default to no pattern because most are odd enough not to have one.
+multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
+ SDPatternOperator opnode = FPNoUnop> {
+ def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set f32:$Rd, (opnode f32:$Rn))],
+ NoItinerary>;
+
+ def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set f64:$Rd, (opnode f64:$Rn))],
+ NoItinerary>;
+}
+
+defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
+defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
+defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
+defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+
+defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
+defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
+defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
+defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
+defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
+defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
+defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
+
+// The FCVT instrucitons have different source and destination register-types,
+// but the fields are uniform everywhere a D-register (say) crops up. Package
+// this information in a Record.
+class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
+ RegisterClass Class = rc;
+ ValueType VT = vt;
+ bit t1 = fld{1};
+ bit t0 = fld{0};
+}
+
+def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
+def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
+def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
+
+class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
+ : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
+ {0,0,0,1, DestReg.t1, DestReg.t0},
+ (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
+ "fcvt\t$Rd, $Rn",
+ [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
+
+def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
+def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
+def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
+def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
+def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
+def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
+
+def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>;
+
+multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
+ SDPatternOperator opnode> {
+ def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
+ (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
+ NoItinerary>;
+
+ def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
+ NoItinerary>;
+}
+
+let isCommutable = 1 in {
+ defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+ defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
+
+ // No patterns for these.
+ defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
+ defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
+ defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
+ defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
+
+ defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (fneg (fmul node:$lhs, node:$rhs))> >;
+}
+
+defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMADD, FMSUB, FNMADD, FNMSUB
+
+def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
+def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
+def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
+
+class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
+ bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
+ : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
+ (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
+ !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
+ [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
+ NoItinerary>;
+
+def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
+def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
+def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
+def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
+
+def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
+def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
+def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
+def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> fixed-point conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+// #1-#32 allowed, encoded as "64 - <specified imm>
+def fixedpos_asmoperand_i32 : AsmOperandClass {
+ let Name = "CVTFixedPos32";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<32>";
+ let DiagnosticType = "CVTFixedPos32";
+}
+
+// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
+def fixedpos_asmoperand_i64 : AsmOperandClass {
+ let Name = "CVTFixedPos64";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<64>";
+ let DiagnosticType = "CVTFixedPos64";
+}
+
+// We need the cartesian product of f32/f64 i32/i64 operands for
+// conversions:
+// + Selection needs to use operands of correct floating type
+// + Assembly parsing and decoding depend on integer width
+class cvtfix_i32_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i32;
+ let DecoderMethod = "DecodeCVT32FixedPosOperand";
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+class cvtfix_i64_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i64;
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+// Because of the proliferation of weird operands, it's not really
+// worth going for a multiclass here. Oh well.
+
+class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass GPR, RegisterClass FPR,
+ ValueType DstTy, ValueType SrcTy,
+ Operand scale_op, string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
+ (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
+ NoItinerary>;
+
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
+ cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
+ cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
+ cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
+ cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
+
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
+ cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
+ cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
+ cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
+ cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
+
+
+class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass FPR, RegisterClass GPR,
+ ValueType DstTy, ValueType SrcTy,
+ Operand scale_op, string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
+ (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
+ NoItinerary>;
+
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
+ cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
+ cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
+ cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
+ cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
+ cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
+ cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
+ cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
+ cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> integer conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass DestPR, RegisterClass SrcPR, string asmop>
+ : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+
+multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
+ def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
+ GPR32, FPR32, asmop # "s">;
+ def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
+ GPR64, FPR32, asmop # "s">;
+ def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
+ GPR32, FPR32, asmop # "u">;
+ def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
+ GPR64, FPR32, asmop # "u">;
+
+ def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
+ GPR32, FPR64, asmop # "s">;
+ def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
+ GPR64, FPR64, asmop # "s">;
+ def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
+ GPR32, FPR64, asmop # "u">;
+ def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
+ GPR64, FPR64, asmop # "u">;
+}
+
+defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
+defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
+defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
+defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
+defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+
+def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
+def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
+def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
+def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
+def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
+def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
+def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
+def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
+
+multiclass A64I_inttofp<bit o0, string asmop> {
+ def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
+ def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
+ def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
+ def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
+}
+
+defm S : A64I_inttofp<0b0, "scvtf">;
+defm U : A64I_inttofp<0b1, "ucvtf">;
+
+def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
+def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
+def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
+def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
+def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
+def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
+def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
+def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
+
+def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
+def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
+def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
+def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+
+def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
+def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
+def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
+def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
+
+def lane1_asmoperand : AsmOperandClass {
+ let Name = "Lane1";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "Lane1";
+}
+
+def lane1 : Operand<i32> {
+ let ParserMatchClass = lane1_asmoperand;
+ let PrintMethod = "printBareImmOperand";
+}
+
+let DecoderMethod = "DecodeFMOVLaneInstruction" in {
+ def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
+ (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
+ "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+
+ def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
+ (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
+ "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+}
+
+def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
+ (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
+
+def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
+ (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediate instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMOV
+
+def fpimm_asmoperand : AsmOperandClass {
+ let Name = "FMOVImm";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPImm";
+}
+
+// The MCOperand for these instructions are the encoded 8-bit values.
+def SDXF_fpimm : SDNodeXForm<fpimm, [{
+ uint32_t Imm8;
+ A64Imms::isFPImm(N->getValueAPF(), Imm8);
+ return CurDAG->getTargetConstant(Imm8, MVT::i32);
+}]>;
+
+class fmov_operand<ValueType FT>
+ : Operand<i32>,
+ PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
+ SDXF_fpimm> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = fpimm_asmoperand;
+}
+
+def fmov32_operand : fmov_operand<f32>;
+def fmov64_operand : fmov_operand<f64>;
+
+class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
+ Operand fmov_operand>
+ : A64I_fpimm<0b0, 0b0, type, 0b00000,
+ (outs Reg:$Rd),
+ (ins fmov_operand:$Imm8),
+ "fmov\t$Rd, $Imm8",
+ [(set VT:$Rd, fmov_operand:$Imm8)],
+ NoItinerary>;
+
+def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
+def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+
+//===----------------------------------------------------------------------===//
+// Load-register (literal) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDR, LDRSW, PRFM
+
+def ldrlit_label_asmoperand : AsmOperandClass {
+ let Name = "LoadLitLabel";
+ let RenderMethod = "addLabelOperands<19, 4>";
+ let DiagnosticType = "Label";
+}
+
+def ldrlit_label : Operand<i64> {
+ let EncoderMethod = "getLoadLitLabelOpValue";
+
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let ParserMatchClass = ldrlit_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Various instructions take an immediate value (which can always be used),
+// where some numbers have a symbolic name to make things easier. These operands
+// and the associated functions abstract away the differences.
+multiclass namedimm<string prefix, string mapper> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "NamedImm" # prefix;
+ let PredicateMethod = "isUImm";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
+ let DiagnosticType = "NamedImm_" # prefix;
+ }
+
+ def _op : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printNamedImmOperand<" # mapper # ">";
+ let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
+ }
+}
+
+defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+
+class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
+ list<dag> patterns = []>
+ : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
+ "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+
+let mayLoad = 1 in {
+ def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
+ def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
+}
+
+def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
+def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+
+let mayLoad = 1 in {
+ def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+
+
+ def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins ldrlit_label:$Imm19),
+ "ldrsw\t$Rt, $Imm19",
+ [], NoItinerary>;
+
+ def PRFM_lit : A64I_LDRlit<0b11, 0b0,
+ (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
+ "prfm\t$Rt, $Imm19",
+ [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load-store exclusive instructions
+//===----------------------------------------------------------------------===//
+// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
+// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
+// STLRH, STLR, LDARB, LDARH, LDAR
+
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fiels since Rt and Rn are always used.
+
+// This operand parses a GPR64xsp register, followed by an optional immediate
+// #0.
+def GPR64xsp0_asmoperand : AsmOperandClass {
+ let Name = "GPR64xsp0";
+ let PredicateMethod = "isWrappedReg";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "ParseLSXAddressOperand";
+ // Diagnostics are provided by ParserMethod
+}
+
+def GPR64xsp0 : RegisterOperand<GPR64xsp> {
+ let ParserMatchClass = GPR64xsp0_asmoperand;
+}
+
+//===----------------------------------
+// Store-exclusive (releasing & normal)
+//===----------------------------------
+
+class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [],NoItinerary>;
+
+ def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
+defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
+
+//===----------------------------------
+// Loads
+//===----------------------------------
+
+class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayLoad = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass A64I_LRex<string asmstr, bits<3> opcode> {
+ def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXR : A64I_LRex<"ldxr", 0b000>;
+defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
+defm LDAR : A64I_LRex<"ldar", 0b101>;
+
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
+
+def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>;
+def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
+def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>;
+def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
+
+//===----------------------------------
+// Store-release (no exclusivity)
+//===----------------------------------
+
+class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Release;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+def atomic_store_release_64 : releasing_store<atomic_store_64>;
+
+multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
+ NoItinerary>;
+
+ def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
+ (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
+ NoItinerary>;
+}
+
+defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
+
+//===----------------------------------
+// Store-exclusive pair (releasing & normal)
+//===----------------------------------
+
+class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stt2n <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+}
+
+
+multiclass A64I_SPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXP : A64I_SPex<"stxp", 0b010>;
+defm STLXP : A64I_SPex<"stlxp", 0b011>;
+
+//===----------------------------------
+// Load-exclusive pair (acquiring & normal)
+//===----------------------------------
+
+class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tt2n <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
+ pat, itin>{
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+multiclass A64I_LPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt, GPR32:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXP : A64I_LPex<"ldxp", 0b010>;
+defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unscaled immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (register offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (unsigned immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+
+// Note that patterns are much later on in a completely separate section (they
+// need ADRPxi to be defined).
+
+//===-------------------------------
+// 1. Various operands needed
+//===-------------------------------
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+// The addressing mode for these instructions consists of an unsigned 12-bit
+// immediate which is scaled by the size of the memory access.
+//
+// We represent this in the MC layer by two operands:
+// 1. A base register.
+// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
+// would have '1' in this field.
+// This means that separate functions are needed for converting representations
+// which *are* aware of the intended access size.
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+
+multiclass offsets_uimm12<int MemSize, string prefix> {
+ def uimm12_asmoperand : AsmOperandClass {
+ let Name = "OffsetUImm12_" # MemSize;
+ let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
+ let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreUImm12_" # MemSize;
+ }
+
+ // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
+ // complicates things beyond TableGen's ken.
+ def uimm12 : Operand<i64>,
+ ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+
+ let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
+ let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
+ }
+}
+
+defm byte_ : offsets_uimm12<1, "byte_">;
+defm hword_ : offsets_uimm12<2, "hword_">;
+defm word_ : offsets_uimm12<4, "word_">;
+defm dword_ : offsets_uimm12<8, "dword_">;
+defm qword_ : offsets_uimm12<16, "qword_">;
+
+//===-------------------------------
+// 1.1 Signed 9-bit immediate operands
+//===-------------------------------
+
+// The MCInst is expected to store the bit-wise encoding of the value,
+// which amounts to lopping off the extended sign bits.
+def SDXF_simm9 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
+}]>;
+
+def simm9_asmoperand : AsmOperandClass {
+ let Name = "SImm9";
+ let PredicateMethod = "isSImm<9>";
+ let RenderMethod = "addSImmOperands<9>";
+ let DiagnosticType = "LoadStoreSImm9";
+}
+
+def simm9 : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
+ SDXF_simm9> {
+ let PrintMethod = "printOffsetSImm9Operand";
+ let ParserMatchClass = simm9_asmoperand;
+}
+
+
+//===-------------------------------
+// 1.3 Register offset extensions
+//===-------------------------------
+
+// The assembly-syntax for these addressing-modes is:
+// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
+//
+// The essential semantics are:
+// + <amount> is a shift: #<log(transfer size)> or #0
+// + <R> can be W or X.
+// + If <R> is W, <extend> can be UXTW or SXTW
+// + If <R> is X, <extend> can be LSL or SXTX
+//
+// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
+// which will need separate instructions for LLVM type-consistency. We'll also
+// need separate operands, of course.
+multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
+ string Rm, string prefix> {
+ def regext_asmoperand : AsmOperandClass {
+ let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
+ let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
+ let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
+ }
+
+ def regext : Operand<i64> {
+ let PrintMethod
+ = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+
+ let DecoderMethod = "DecodeAddrRegExtendOperand";
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # regext_asmoperand);
+ }
+}
+
+multiclass regexts_wx<int MemSize, string prefix> {
+ // Rm is an X-register if LSL or SXTX are specified as the shift.
+ defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+
+ // Rm is a W-register if UXTW or SXTW are specified as the shift.
+ defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
+}
+
+defm byte_ : regexts_wx<1, "byte_">;
+defm hword_ : regexts_wx<2, "hword_">;
+defm word_ : regexts_wx<4, "word_">;
+defm dword_ : regexts_wx<8, "dword_">;
+defm qword_ : regexts_wx<16, "qword_">;
+
+
+//===------------------------------
+// 2. The instructions themselves.
+//===------------------------------
+
+// We have the following instructions to implement:
+// | | B | H | W | X |
+// |-----------------+-------+-------+-------+--------|
+// | unsigned str | STRB | STRH | STR | STR |
+// | unsigned ldr | LDRB | LDRH | LDR | LDR |
+// | signed ldr to W | LDRSB | LDRSH | - | - |
+// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+
+// This will instantiate the LDR/STR instructions you'd expect to use for an
+// unsigned datatype (first two rows above) or floating-point register, which is
+// reasonably uniform across all access sizes.
+
+
+//===------------------------------
+// 2.1 Regular instructions
+//===------------------------------
+
+// This class covers the basic unsigned or irrelevantly-signed loads and stores,
+// to general-purpose and floating-point registers.
+
+class AddrParams<string prefix> {
+ Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
+
+ Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
+ Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
+}
+
+def byte_addrparams : AddrParams<"byte">;
+def hword_addrparams : AddrParams<"hword">;
+def word_addrparams : AddrParams<"word">;
+def dword_addrparams : AddrParams<"dword">;
+def qword_addrparams : AddrParams<"qword">;
+
+multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
+ bit high_opc, string asmsuffix,
+ RegisterClass GPR, AddrParams params> {
+ // Unsigned immediate
+ def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset (four of these: load/store and Wm/Xm).
+ let mayLoad = 1 in {
+ def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ let mayStore = 1 in {
+ def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
+ params.regextWm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
+ params.regextXm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ // Unaligned immediate
+ def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Post-indexed
+ def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+}
+
+// STRB/LDRB: First define the instructions
+defm LS8
+ : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
+
+// STRH/LDRH
+defm LS16
+ : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
+
+
+// STR/LDR to/from a W register
+defm LS32
+ : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
+
+// STR/LDR to/from an X register
+defm LS64
+ : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+
+// STR/LDR to/from a B register
+defm LSFP8
+ : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
+
+// STR/LDR to/from an H register
+defm LSFP16
+ : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
+
+// STR/LDR to/from an S register
+defm LSFP32
+ : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
+// STR/LDR to/from a D register
+defm LSFP64
+ : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
+// STR/LDR to/from a Q register
+defm LSFP128
+ : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
+ qword_addrparams>;
+
+//===------------------------------
+// 2.3 Signed loads
+//===------------------------------
+
+// Byte and half-word signed loads can both go into either an X or a W register,
+// so it's worth factoring out. Signed word loads don't fit because there is no
+// W version.
+multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
+ string prefix> {
+ // Unsigned offset
+ def w : A64I_LSunsigimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def x : A64I_LSunsigimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset
+ let mayLoad = 1 in {
+ def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+ let mayLoad = 1 in {
+ // Unaligned offset
+ def w_U : A64I_LSunalimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x_U : A64I_LSunalimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+
+ // Post-indexed
+ def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+ } // let mayLoad = 1
+}
+
+// LDRSB
+defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
+// LDRSH
+defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
+
+// LDRSW: load a 32-bit register, sign-extending to 64-bits.
+def LDRSWx
+ : A64I_LSunsigimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
+ "ldrsw\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
+ (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
+
+
+def LDURSWx
+ : A64I_LSunalimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldursw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+def LDRSWx_PostInd
+ : A64I_LSpostind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+//===------------------------------
+// 2.4 Prefetch operations
+//===------------------------------
+
+def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
+ "prfm\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfm $Rt, [$Rn]",
+ (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR32:$Rm, dword_Wm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, dword_Xm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+
+def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+ (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "prfum\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfum $Rt, [$Rn]",
+ (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unprivileged) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
+
+// These instructions very much mirror the "unscaled immediate" loads, but since
+// there are no floating-point variants we need to split them out into their own
+// section to avoid instantiation of "ldtr d0, [sp]" etc.
+
+multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
+ string prefix> {
+ def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+
+ def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+
+ def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// STTRB/LDTRB: First define the instructions
+defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
+
+// STTRH/LDTRH
+defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
+
+// STTR/LDTR to/from a W register
+defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
+
+// STTR/LDTR to/from an X register
+defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
+
+// Now a class for the signed instructions that can go to either 32 or 64
+// bits...
+multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
+ let mayLoad = 1 in {
+ def w : A64I_LSunpriv<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x : A64I_LSunpriv<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+ }
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// LDTRSB
+defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
+// LDTRSH
+defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
+
+// And finally LDTRSW which only goes to 64 bits.
+def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrsw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store non-temporal register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STNP, LDNP
+
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+multiclass offsets_simm7<string MemSize, string prefix> {
+ // The bare signed 7-bit immediate is used in post-indexed instructions, but
+ // because of the scaling performed a generic "simm7" operand isn't
+ // appropriate here either.
+ def simm7_asmoperand : AsmOperandClass {
+ let Name = "SImm7_Scaled" # MemSize;
+ let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
+ let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreSImm7_" # MemSize;
+ }
+
+ def simm7 : Operand<i64> {
+ let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
+ }
+}
+
+defm word_ : offsets_simm7<"4", "word_">;
+defm dword_ : offsets_simm7<"8", "dword_">;
+defm qword_ : offsets_simm7<"16", "qword_">;
+
+multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
+ Operand simm7, string prefix> {
+ def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSPoffset<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
+ (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2,
+ GPR64xsp:$Rn,
+ simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+}
+
+
+defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
+defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
+defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
+defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
+ "LSFPPair128">;
+
+
+def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
+ (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
+
+def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+//===----------------------------------------------------------------------===//
+// Logical (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
+
+multiclass logical_imm_operands<string prefix, string note,
+ int size, ValueType VT> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "LogicalImm" # note # size;
+ let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
+ let RenderMethod = "addLogicalImmOperands<" # size # ">";
+ let DiagnosticType = "LogicalSecondSource";
+ }
+
+ def _operand
+ : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printLogicalImmOperand<" # size # ">";
+ let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
+ }
+}
+
+defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
+defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
+
+// The mov versions only differ in assembly parsing, where they
+// exclude values representable with either MOVZ or MOVN.
+defm logical_imm32_mov
+ : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
+defm logical_imm64_mov
+ : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
+
+
+multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set i32:$Rd,
+ (opnode i32:$Rn, logical_imm32_operand:$Imm))],
+ NoItinerary>;
+
+ def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set i64:$Rd,
+ (opnode i64:$Rn, logical_imm64_operand:$Imm))],
+ NoItinerary>;
+}
+
+defm AND : A64I_logimmSizes<0b00, "and", and>;
+defm ORR : A64I_logimmSizes<0b01, "orr", or>;
+defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
+
+let Defs = [NZCV] in {
+ def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+
+ def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+}
+
+
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+
+//===----------------------------------------------------------------------===//
+// Logical (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
+
+// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
+// behaves differently for unsigned comparisons, so we defensively only allow
+// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
+// equal to 0" and LLVM gives us this.
+def signed_cond : PatLeaf<(cond), [{
+ return !isUnsignedIntSetCC(N->get());
+}]>;
+
+
+// These instructions share their "shift" operands with add/sub (shifted
+// register instructions). They are defined there.
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
+ bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag, ValueType ty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, opc, 0b10, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, opc, 0b11, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag ty:$Rn, ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
+ commutable, asmop, opfrag, i64, GPR64, defs>;
+ defm www : logical_shifts<prefix # "www", 0b0, opc, N,
+ commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
+defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
+defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
+defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
+ PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs))>, []>;
+defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (or node:$lhs, (not node:$rhs))>, []>;
+defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (xor node:$lhs, (not node:$rhs))>, []>;
+defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs)),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+ let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"tst $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
+ (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
+
+
+multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+ let isCommutable = 0, Rn = 0b11111 in {
+ def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (shl ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (srl ty:$Rm,
+ !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (sra ty:$Rm,
+ !cast<Operand>("asr_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("ror_operand_" # ty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set ty:$Rd, (not (rotr ty:$Rm,
+ !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"mvn $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(not ty:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
+}
+
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
+
+def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Move wide (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MOVN, MOVZ, MOVK + MOV aliases
+
+// A wide variety of different relocations are needed for variants of these
+// instructions, so it turns out that we need a different operand for all of
+// them.
+multiclass movw_operands<string prefix, string instname, int width> {
+ def _imm_asmoperand : AsmOperandClass {
+ let Name = instname # width # "Shifted" # shift;
+ let PredicateMethod = "is" # instname # width # "Imm";
+ let RenderMethod = "addMoveWideImmOperands";
+ let ParserMethod = "ParseImmWithLSLOperand";
+ let DiagnosticType = "MOVWUImm16";
+ }
+
+ def _imm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
+ let PrintMethod = "printMoveWideImmOperand";
+ let EncoderMethod = "getMoveWideImmOpValue";
+ let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movn32 : movw_operands<"movn32", "MOVN", 32>;
+defm movn64 : movw_operands<"movn64", "MOVN", 64>;
+defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
+defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
+defm movk32 : movw_operands<"movk32", "MOVK", 32>;
+defm movk64 : movw_operands<"movk64", "MOVK", 64>;
+
+multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
+ dag ins64bit> {
+
+ def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+
+ def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+}
+
+let isMoveImm = 1, isReMaterializable = 1,
+ isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+ defm MOVN : A64I_movwSizes<0b00, "movn",
+ (ins movn32_imm:$FullImm),
+ (ins movn64_imm:$FullImm)>;
+
+ // Some relocations are able to convert between a MOVZ and a MOVN. If these
+ // are applied the instruction must be emitted with the corresponding bits as
+ // 0, which means a MOVZ needs to override that bit from the default.
+ let PostEncoderMethod = "fixMOVZ" in
+ defm MOVZ : A64I_movwSizes<0b10, "movz",
+ (ins movz32_imm:$FullImm),
+ (ins movz64_imm:$FullImm)>;
+}
+
+let Constraints = "$src = $Rd" in
+defm MOVK : A64I_movwSizes<0b11, "movk",
+ (ins GPR32:$src, movk32_imm:$FullImm),
+ (ins GPR64:$src, movk64_imm:$FullImm)>;
+
+
+// And now the "MOV" aliases. These also need their own operands because what
+// they accept is completely different to what the base instructions accept.
+multiclass movalias_operand<string prefix, string basename,
+ string immpredicate, int width> {
+ def _asmoperand : AsmOperandClass {
+ let Name = basename # width # "MovAlias";
+ let PredicateMethod
+ = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
+ let RenderMethod
+ = "addMoveWideMovAliasOperands<" # width # ", "
+ # "A64Imms::" # immpredicate # ">";
+ }
+
+ def _movimm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
+defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
+defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
+defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
+
+// FIXME: these are officially canonical aliases, but TableGen is too limited to
+// print them at the moment. I believe in this case an "AliasPredicate" method
+// will need to be implemented. to allow it, as well as the more generally
+// useful handling of non-register, non-constant operands.
+class movalias<Instruction INST, RegisterClass GPR, Operand operand>
+ : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
+
+def : movalias<MOVZwii, GPR32, movz32_movimm>;
+def : movalias<MOVZxii, GPR64, movz64_movimm>;
+def : movalias<MOVNwii, GPR32, movn32_movimm>;
+def : movalias<MOVNxii, GPR64, movn64_movimm>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative addressing instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADR, ADRP
+
+def adr_label : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
+
+ // This label is a 21-bit offset from PC, unscaled
+ let PrintMethod = "printLabelOperand<21, 1>";
+ let ParserMatchClass = label_asmoperand<21, 1>;
+ let OperandType = "OPERAND_PCREL";
+}
+
+def adrp_label_asmoperand : AsmOperandClass {
+ let Name = "AdrpLabel";
+ let RenderMethod = "addLabelOperands<21, 4096>";
+ let DiagnosticType = "Label";
+}
+
+def adrp_label : Operand<i64> {
+ let EncoderMethod = "getAdrpLabelOpValue";
+
+ // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
+ let PrintMethod = "printLabelOperand<21, 4096>";
+ let ParserMatchClass = adrp_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+let hasSideEffects = 0 in {
+ def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
+ "adr\t$Rd, $Label", [], NoItinerary>;
+
+ def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
+ "adrp\t$Rd, $Label", [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// System instructions
+//===----------------------------------------------------------------------===//
+// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
+// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
+
+// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
+def uimm3_asmoperand : AsmOperandClass {
+ let Name = "UImm3";
+ let PredicateMethod = "isUImm<3>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm3";
+}
+
+def uimm3 : Operand<i32> {
+ let ParserMatchClass = uimm3_asmoperand;
+}
+
+// The HINT alias can accept a simple unsigned 7-bit immediate.
+def uimm7_asmoperand : AsmOperandClass {
+ let Name = "UImm7";
+ let PredicateMethod = "isUImm<7>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm7";
+}
+
+def uimm7 : Operand<i32> {
+ let ParserMatchClass = uimm7_asmoperand;
+}
+
+// Multiclass namedimm is defined with the prefetch operands. Most of these fit
+// into the NamedImmMapper scheme well: they either accept a named operand or
+// any immediate under a particular value (which may be 0, implying no immediate
+// is allowed).
+defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
+defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
+defm ic : namedimm<"ic", "A64IC::ICMapper">;
+defm dc : namedimm<"dc", "A64DC::DCMapper">;
+defm at : namedimm<"at", "A64AT::ATMapper">;
+defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
+
+// However, MRS and MSR are more complicated for a few reasons:
+// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
+// implementation-defined effect
+// * Most registers are shared, but some are read-only or write-only.
+// * There is a variant of MSR which accepts the same register name (SPSel),
+// but which would have a different encoding.
+
+// In principle these could be resolved in with more complicated subclasses of
+// NamedImmMapper, however that imposes an overhead on other "named
+// immediates". Both in concrete terms with virtual tables and in unnecessary
+// abstraction.
+
+// The solution adopted here is to take the MRS/MSR Mappers out of the usual
+// hierarchy (they're not derived from NamedImmMapper) and to add logic for
+// their special situation.
+def mrs_asmoperand : AsmOperandClass {
+ let Name = "MRS";
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MRS";
+}
+
+def mrs_op : Operand<i32> {
+ let ParserMatchClass = mrs_asmoperand;
+ let PrintMethod = "printMRSOperand";
+ let DecoderMethod = "DecodeMRSOperand";
+}
+
+def msr_asmoperand : AsmOperandClass {
+ let Name = "MSRWithReg";
+
+ // Note that SPSel is valid for both this and the pstate operands, but with
+ // different immediate encodings. This is why these operands provide a string
+ // AArch64Operand rather than an immediate. The overlap is small enough that
+ // it could be resolved with hackery now, but who can say in future?
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def msr_op : Operand<i32> {
+ let ParserMatchClass = msr_asmoperand;
+ let PrintMethod = "printMSROperand";
+ let DecoderMethod = "DecodeMSROperand";
+}
+
+def pstate_asmoperand : AsmOperandClass {
+ let Name = "MSRPState";
+ // See comment above about parser.
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def pstate_op : Operand<i32> {
+ let ParserMatchClass = pstate_asmoperand;
+ let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
+ let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
+}
+
+// When <CRn> is specified, an assembler should accept something like "C4", not
+// the usual "#4" immediate.
+def CRx_asmoperand : AsmOperandClass {
+ let Name = "CRx";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseCRxOperand";
+ // Diagnostics are handled in all cases by ParseCRxOperand.
+}
+
+def CRx : Operand<i32> {
+ let ParserMatchClass = CRx_asmoperand;
+ let PrintMethod = "printCRxOperand";
+}
+
+
+// Finally, we can start defining the instructions.
+
+// HINT is straightforward, with a few aliases.
+def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
+ [], NoItinerary> {
+ bits<7> UImm7;
+ let CRm = UImm7{6-3};
+ let Op2 = UImm7{2-0};
+
+ let Op0 = 0b00;
+ let Op1 = 0b011;
+ let CRn = 0b0010;
+ let Rt = 0b11111;
+}
+
+def : InstAlias<"nop", (HINTi 0)>;
+def : InstAlias<"yield", (HINTi 1)>;
+def : InstAlias<"wfe", (HINTi 2)>;
+def : InstAlias<"wfi", (HINTi 3)>;
+def : InstAlias<"sev", (HINTi 4)>;
+def : InstAlias<"sevl", (HINTi 5)>;
+
+// Quite a few instructions then follow a similar pattern of fixing common
+// fields in the bitpattern, we'll define a helper-class for them.
+class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
+ Operand operand, string asmop>
+ : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
+ [], NoItinerary> {
+ let Op0 = op0;
+ let Op1 = op1;
+ let CRn = crn;
+ let Op2 = op2;
+ let Rt = 0b11111;
+}
+
+
+def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
+def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
+def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
+def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
+
+def : InstAlias<"clrex", (CLREXi 0b1111)>;
+def : InstAlias<"isb", (ISBi 0b1111)>;
+
+// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
+// configurations at least.
+def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
+
+// Any SYS bitpattern can be represented with a complex and opaque "SYS"
+// instruction.
+def SYSiccix : A64I_system<0b0, (outs),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
+ uimm3:$Op2, GPR64:$Rt),
+ "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// You can skip the Xt argument whether it makes sense or not for the generic
+// SYS instruction.
+def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
+ (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
+
+
+// But many have aliases, which obviously don't fit into
+class SYSalias<dag ins, string asmstring>
+ : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
+ let isAsmParserOnly = 1;
+
+ bits<14> SysOp;
+ let Op0 = 0b01;
+ let Op1 = SysOp{13-11};
+ let CRn = SysOp{10-7};
+ let CRm = SysOp{6-3};
+ let Op2 = SysOp{2-0};
+}
+
+def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
+
+def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
+def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
+
+def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
+
+def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+
+def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
+ "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// The instructions themselves are rather simple for MSR and MRS.
+def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
+ "msr\t$SysReg, $Rt", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
+ "mrs\t$Rt, $SysReg", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
+ "msr\t$PState, $CRm", [], NoItinerary> {
+ bits<6> PState;
+
+ let Op0 = 0b00;
+ let Op1 = PState{5-3};
+ let CRn = 0b0100;
+ let Op2 = PState{2-0};
+ let Rt = 0b11111;
+}
+
+//===----------------------------------------------------------------------===//
+// Test & branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: TBZ, TBNZ
+
+// The bit to test is a simple unsigned 6-bit immediate in the X-register
+// versions.
+def uimm6 : Operand<i64> {
+ let ParserMatchClass = uimm6_asmoperand;
+}
+
+def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
+
+def tbimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
+
+ // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<14, 4>";
+ let ParserMatchClass = label_wid14_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
+def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
+
+// These instructions correspond to patterns involving "and" with a power of
+// two, which we need to be able to select.
+def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
+def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
+
+let isBranch = 1, isTerminator = 1 in {
+ def TBZxii : A64I_TBimm<0b0, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary>;
+
+ def TBNZxii : A64I_TBimm<0b1, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary>;
+
+
+ // Note, these instructions overlap with the above 64-bit patterns. This is
+ // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
+ // do the same thing and are both permitted assembly. They also both have
+ // sensible DAG patterns.
+ def TBZwii : A64I_TBimm<0b0, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+
+ def TBNZwii : A64I_TBimm<0b1, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B, BL
+
+def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
+
+def bimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blimm_target : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
+ : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
+ !strconcat(asmop, "\t$Label"), patterns,
+ NoItinerary>;
+
+let isBranch = 1 in {
+ def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ }
+
+ def BLimm : A64I_BimmImpl<0b1, "bl",
+ [(AArch64Call tglobaladdr:$Label)], blimm_target> {
+ let isCall = 1;
+ let Defs = [X30];
+ }
+}
+
+def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: BR, BLR, RET, ERET, DRP.
+
+// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
+// at the moment.
+class A64I_BregImpl<bits<4> opc,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin = NoItinerary>
+ : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
+ outs, ins, asmstr, patterns, itin> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Note that these are not marked isCall or isReturn because as far as LLVM is
+// concerned they're not. "ret" is just another jump unless it has been selected
+// by LLVM as the function's return.
+
+let isBranch = 1 in {
+ def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
+ "br\t$Rn", [(brind i64:$Rn)]> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ }
+
+ def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+ "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
+ let isBarrier = 0;
+ let isCall = 1;
+ let Defs = [X30];
+ }
+
+ def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
+ "ret\t$Rn", []> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ // Create a separate pseudo-instruction for codegen to use so that we don't
+ // flag x30 as used in every function. It'll be restored before the RET by the
+ // epilogue if it's legitimately used.
+ def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
+ }
+
+ def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ }
+}
+
+def RETAlias : InstAlias<"ret", (RETx X30)>;
+
+
+//===----------------------------------------------------------------------===//
+// Address generation patterns
+//===----------------------------------------------------------------------===//
+
+// Primary method of address generation for the small/absolute memory model is
+// an ADRP/ADR pair:
+// ADRP x0, some_variable
+// ADD x0, x0, #:lo12:some_variable
+//
+// The load/store elision of the ADD is accomplished when selecting
+// addressing-modes. This just mops up the cases where that doesn't work and we
+// really need an address in some register.
+
+// This wrapper applies a LO12 modifier to the address. Otherwise we could just
+// use the same address.
+
+class ADRP_ADD<SDNode Wrapper, SDNode addrop>
+ : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
+ (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
+
+def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
+def : ADRP_ADD<A64WrapperSmall, texternalsym>;
+def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
+def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
+def : ADRP_ADD<A64WrapperSmall, tjumptable>;
+
+//===----------------------------------------------------------------------===//
+// GOT access patterns
+//===----------------------------------------------------------------------===//
+
+// FIXME: Wibble
+
+class GOTLoadSmall<SDNode addrfrag>
+ : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
+ (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
+
+def : GOTLoadSmall<texternalsym>;
+def : GOTLoadSmall<tglobaladdr>;
+def : GOTLoadSmall<tglobaltlsaddr>;
+
+//===----------------------------------------------------------------------===//
+// Tail call handling
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
+ def TC_RETURNdi
+ : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
+ [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
+
+ def TC_RETURNxi
+ : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
+ [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ Uses = [XSP] in {
+ def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
+ (Bimm bimm_target:$Label)>;
+
+ def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
+ (BRx GPR64:$Rd)>;
+}
+
+
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+ (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+//===----------------------------------------------------------------------===//
+// Thread local storage
+//===----------------------------------------------------------------------===//
+
+// This is a pseudo-instruction representing the ".tlsdesccall" directive in
+// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
+// current location. It should always be immediately followed by a BLR
+// instruction, and is intended solely for relaxation by the linker.
+
+def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
+
+def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
+ let hasSideEffects = 1;
+}
+
+def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
+ [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
+ let isCall = 1;
+ let Defs = [X30];
+}
+
+def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
+ (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield patterns
+//===----------------------------------------------------------------------===//
+
+def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+}]>;
+
+def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+}]>;
+
+def bfi_width_to_imms : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+}]>;
+
+
+// The simpler patterns deal with cases where no AND mask is actually needed
+// (either all bits are used or the low 32 bits are used).
+let AddedComplexity = 10 in {
+
+def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIxxii $src, $Rn,
+ (bfi64_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIwwii $src, $Rn,
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+
+def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+ (i64 4294967295)),
+ (SUBREG_TO_REG (i64 0),
+ (BFIwwii (EXTRACT_SUBREG $src, sub_32),
+ (EXTRACT_SUBREG $Rn, sub_32),
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS))),
+ sub_32)>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous patterns
+//===----------------------------------------------------------------------===//
+
+// Truncation from 64 to 32-bits just involves renaming your register.
+def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
+
+// Similarly, extension where we don't care about the high bits is
+// just a rename.
+def : Pat<(i64 (anyext i32:$val)),
+ (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
+
+// SELECT instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : PseudoInst<(outs FPR128:$Rd),
+ (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+ [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
+ let Uses = [NZCV];
+ let usesCustomInserter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Load/store patterns
+//===----------------------------------------------------------------------===//
+
+// There are lots of patterns here, because we need to allow at least three
+// parameters to vary independently.
+// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
+// 2. LLVM source: zextloadi8, anyextloadi8, ...
+// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+//
+// The biggest problem turns out to be the address-generation variable. At the
+// point of instantiation we need to produce two DAGs, one for the pattern and
+// one for the instruction. Doing this at the lowest level of classes doesn't
+// work.
+//
+// Consider the simple uimm12 addressing mode, and the desire to match both (add
+// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
+// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
+// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
+// operation, and PatFrags are for selection not output.
+//
+// As a result, the address-generation patterns are the final
+// instantiations. However, we do still need to vary the operand for the address
+// further down (At the point we're deciding A64WrapperSmall, we don't know
+// the memory width of the operation).
+
+//===------------------------------
+// 1. Basic infrastructural defs
+//===------------------------------
+
+// First, some simple classes for !foreach and !subst to use:
+class Decls {
+ dag pattern;
+}
+
+def decls : Decls;
+def ALIGN;
+def INST;
+def OFFSET;
+def SHIFT;
+
+// You can't use !subst on an actual immediate, but you *can* use it on an
+// operand record that happens to match a single immediate. So we do.
+def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
+def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
+def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
+def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
+def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
+
+// If the low bits of a pointer are known to be 0 then an "or" is just as good
+// as addition for computing an offset. This fragment forwards that check for
+// TableGen's use.
+def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
+[{
+ return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
+}]>;
+
+// Load/store (unsigned immediate) operations with relocations against global
+// symbols (for lo12) are only valid if those symbols have correct alignment
+// (since the immediate offset is divided by the access scale, it can't have a
+// remainder).
+//
+// The guaranteed alignment is provided as part of the WrapperSmall
+// operation, and checked against one of these.
+def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
+def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
+def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
+def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
+def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
+
+// "Normal" load/store instructions can be used on atomic operations, provided
+// the ordering parameter is at most "monotonic". Anything above that needs
+// special handling with acquire/release instructions.
+class simple_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_load_simple_i8 : simple_load<atomic_load_8>;
+def atomic_load_simple_i16 : simple_load<atomic_load_16>;
+def atomic_load_simple_i32 : simple_load<atomic_load_32>;
+def atomic_load_simple_i64 : simple_load<atomic_load_64>;
+
+class simple_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_store_simple_i8 : simple_store<atomic_store_8>;
+def atomic_store_simple_i16 : simple_store<atomic_store_16>;
+def atomic_store_simple_i32 : simple_store<atomic_store_32>;
+def atomic_store_simple_i64 : simple_store<atomic_store_64>;
+
+//===------------------------------
+// 2. UImm12 and SImm9
+//===------------------------------
+
+// These instructions have two operands providing the address so they can be
+// treated similarly for most purposes.
+
+//===------------------------------
+// 2.1 Base patterns covering extend/truncate semantics
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType transty,
+ ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+ (STORE $Rt, Base, Offset)>;
+}
+
+// Instructions accessing a memory chunk smaller than a register (or, in a
+// pinch, the same size) have a characteristic set of patterns they want to
+// match: extending loads and truncating stores. This class deals with the
+// sign-neutral version of those patterns.
+//
+// It will be instantiated across multiple addressing-modes.
+multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset,
+ dag address, ValueType sty>
+ : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+ (STORE $Rt, Base, Offset)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+ (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+}
+
+// Next come patterns for sign-extending loads.
+multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
+
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset)>;
+ def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
+}
+
+// Integer operations also get atomic instructions to select for.
+multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, ValueType sty>
+ : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
+ ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
+
+//===------------------------------
+// 2.2. Addressing-mode instantiations
+//===------------------------------
+
+multiclass uimm12_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+ defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+ defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ i32>;
+
+ defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ i32>;
+
+ defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ f16>;
+
+ defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ f32>;
+
+ defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ f64>;
+
+ defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, qword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, qword_uimm12,
+ !subst(ALIGN, min_align16, decls.pattern))),
+ f128>;
+
+ defm : load_signed_pats<"B", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+
+ defm : load_signed_pats<"H", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern)))),
+ (LDRSWx Base, !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)))>;
+}
+
+// Straightforward patterns of last resort: a pointer with or without an
+// appropriate offset.
+defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
+defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
+ (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// The offset could be hidden behind an "or", of course:
+defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
+ (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// Global addresses under the small-absolute model should use these
+// instructions. There are ELF relocations specifically for it.
+defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
+ (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
+ ALIGN),
+ (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
+
+// External symbols that make it this far should also get standard relocations.
+defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
+ ALIGN),
+ (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
+ (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
+
+// We also want to use uimm12 instructions for local variables at the moment.
+def tframeindex_XFORM : SDNodeXForm<frameindex, [{
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->getTargetFrameIndex(FI, MVT::i64);
+}]>;
+
+defm : uimm12_pats<(i64 frameindex:$Rn),
+ (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
+
+// These can be much simpler than uimm12 because we don't to change the operand
+// type (e.g. LDURB and LDURH take the same operands).
+multiclass simm9_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
+ defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
+
+ defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
+ defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
+ defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
+ defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
+ defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
+ f128>;
+
+ def : Pat<(i64 (zextloadi32 address)),
+ (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
+
+ def : Pat<(truncstorei32 i64:$Rt, address),
+ (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+
+ defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
+ defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
+ def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
+}
+
+defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
+ (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
+ (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+
+//===------------------------------
+// 3. Register offset patterns
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag Extend, dag address,
+ ValueType transty, ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+ (STORE $Rt, Base, Offset, Extend)>;
+}
+
+// The register offset instructions take three operands giving the instruction,
+// and have an annoying split between instructions where Rm is 32-bit and
+// 64-bit. So we need a special hierarchy to describe them. Other than that the
+// same operations should be supported as for simm9 and uimm12 addressing.
+
+multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty>
+ : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+ (STORE $Rt, Base, Offset, Extend)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+ (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
+
+}
+
+// Next come patterns for sign-extending loads.
+multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
+ def : Pat<(store sty:$Rt, address),
+ (STORE $Rt, Base, Offset, Extend)>;
+}
+
+multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ ValueType sty>
+ : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
+ ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
+
+multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
+ dag Extend> {
+ defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+ defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+ defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ i64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ f16>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ f32>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ f64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq4, decls.pattern)),
+ f128>;
+
+ defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+
+ defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern))),
+ (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+
+// Finally we're in a position to tell LLVM exactly what addresses are reachable
+// using register-offset instructions. Essentially a base plus a possibly
+// extended, possibly shifted (by access size) offset.
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
+ (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
+ (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
+ (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
new file mode 100644
index 000000000000..c96bf85a716c
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const {
+ const MCExpr *Expr = 0;
+
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+ switch (MO.getTargetFlags()) {
+ case AArch64II::MO_GOT:
+ Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOT_LO12:
+ Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_LO12:
+ Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G1:
+ Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL:
+ Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL_LO12:
+ Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC:
+ Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC_LO12:
+ Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G1:
+ Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_NO_FLAG:
+ // Expr is already correct
+ break;
+ default:
+ llvm_unreachable("Unexpected MachineOperand flag");
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers
+ return false;
+
+ }
+
+ return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI,
+ AArch64AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 5732fd43cdc2..f45d8f784f42 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the SPUSelectionDAGInfo class.
+// This file just contains the anchor for the AArch64MachineFunctionInfo to
+// force vtable emission.
//
//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
-#define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUTargetMachine.h"
using namespace llvm;
-SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
-}
+void AArch64MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
new file mode 100644
index 000000000000..33da54f97fda
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -0,0 +1,149 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// Number of bytes of arguments this function has on the stack. If the callee
+ /// is expected to restore the argument stack this should be a multiple of 16,
+ /// all usable during a tail call.
+ ///
+ /// The alternative would forbid tail call optimisation in some cases: if we
+ /// want to transfer control from a function with 8-bytes of stack-argument
+ /// space to a function with 16-bytes then misalignment of this value would
+ /// make a stack adjustment necessary, which could not be undone by the
+ /// callee.
+ unsigned BytesInStackArgArea;
+
+ /// The number of bytes to restore to deallocate space for incoming
+ /// arguments. Canonically 0 in the C calling convention, but non-zero when
+ /// callee is expected to pop the args.
+ unsigned ArgumentStackToRestore;
+
+ /// If the stack needs to be adjusted on frame entry in two stages, this
+ /// records the size of the first adjustment just prior to storing
+ /// callee-saved registers. The callee-saved slots are addressed assuming
+ /// SP == <incoming-SP> - InitialStackAdjust.
+ unsigned InitialStackAdjust;
+
+ /// Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// general-purpose registers that might contain variadic parameters.
+ int VariadicGPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the general-purpose registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicGPRIdx to what's stored in __gr_top.
+ unsigned VariadicGPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// floating-point registers that might contain variadic parameters.
+ int VariadicFPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the floating-point registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicFPRIdx to what's stored in __vr_top.
+ unsigned VariadicFPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of an object pointing just past the last known stacked
+ /// argument on entry to a variadic function. This goes into the __stack field
+ /// of the va_list type.
+ int VariadicStackIdx;
+
+ /// The offset of the frame pointer from the stack pointer on function
+ /// entry. This is expected to be negative.
+ int FramePointerOffset;
+
+public:
+ AArch64MachineFunctionInfo()
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+ void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+ unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+ void setArgumentStackToRestore(unsigned bytes) {
+ ArgumentStackToRestore = bytes;
+ }
+
+ unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+ void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+ int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+ void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+ unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+ void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+ int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+ void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+ unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+ void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+ int getVariadicStackIdx() const { return VariadicStackIdx; }
+ void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+ int getFramePointerOffset() const { return FramePointerOffset; }
+ void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
new file mode 100644
index 000000000000..20b0dcf86f46
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -0,0 +1,171 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti)
+ : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+ return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::FlagClassRegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(AArch64::XSP);
+ Reserved.set(AArch64::WSP);
+
+ Reserved.set(AArch64::XZR);
+ Reserved.set(AArch64::WZR);
+
+ if (TFI->hasFP(MF)) {
+ Reserved.set(AArch64::X29);
+ Reserved.set(AArch64::W29);
+ }
+
+ return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64FrameLowering *TFI =
+ static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+ // In order to work out the base and offset for addressing, the FrameLowering
+ // code needs to know (sometimes) whether the instruction is storing/loading a
+ // callee-saved register, or whether it's a more generic
+ // operation. Fortunately the frame indices are used *only* for that purpose
+ // and are contiguous, so we can check here.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+ unsigned FrameReg;
+ int64_t Offset;
+ Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+ IsCalleeSaveOp);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ // DBG_VALUE instructions have no real restrictions so they can be handled
+ // easily.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ int MinOffset, MaxOffset, OffsetScale;
+ if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ OffsetScale = 1;
+ } else {
+ // Load/store of a stack object
+ TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+ }
+
+ // The frame lowering has told us a base and offset it thinks we should use to
+ // access this variable, but it's still up to us to make sure the values are
+ // legal for the instruction in question.
+ if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+ unsigned BaseReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+ BaseReg, FrameReg, BaseReg, Offset);
+ FrameReg = BaseReg;
+ Offset = 0;
+ }
+
+ // Negative offsets are expected if we address from FP, but for
+ // now this checks nothing has gone horribly wrong.
+ assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return AArch64::X29;
+ else
+ return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const AArch64FrameLowering *AFI
+ = static_cast<const AArch64FrameLowering*>(TFI);
+ return AFI->useFPForAddressing(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
new file mode 100644
index 000000000000..bb64fd55b2c3
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -0,0 +1,76 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+ const AArch64InstrInfo &TII;
+
+public:
+ AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti);
+
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+ const uint32_t *getTLSDescCallPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *Rs = NULL) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns original class if it is
+ /// possible to copy between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+ /// legal to use in the current sub-target and has the same spill size.
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::tcGPR64RegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 000000000000..bd79546371c5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,203 @@
+//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations that describe the AArch64 register file
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8 : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+ let HWEncoding = enc;
+ let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+ list<SubRegIndex> inds = []>
+ : AArch64Reg<enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+ def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+ def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+ [!cast<Register>("W"#Index)], [sub_32]>,
+ DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+ (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+ (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 7),
+ (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+ (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+ (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar registers in the vector unit:
+// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+ def B # Index : AArch64Reg< Index, "b" # Index>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+ [!cast<Register>("B" # Index)], [sub_8]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+ [!cast<Register>("H" # Index)], [sub_16]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+ [!cast<Register>("S" # Index)], [sub_32]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+ [!cast<Register>("D" # Index)], [sub_64]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+ (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+ (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+ (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+ (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+ (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+// + An operand, giving a single ADDvvv instruction (for example). This turns
+// out to be unworkable in the assembly parser (without every Instruction
+// having a "cvt" function, at least) because the constraints can't be
+// properly enforced. It also complicates specifying patterns since each
+// instruction will accept many types.
+// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+// details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+ def V # Index : AArch64RegWithSubs<Index, "v" # Index,
+ [!cast<Register>("Q" # Index)],
+ [sub_alias]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+ (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+ [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+ (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv"> {
+ let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+ let CopyCost = -1;
+ let isAllocatable = 0;
+}
diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/AArch64/AArch64Schedule.td
index 3e948d071d63..e17cdaa1f6d2 100644
--- a/lib/Target/CellSPU/SPUMachineFunction.cpp
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -1,4 +1,4 @@
-//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,4 @@
//
//===----------------------------------------------------------------------===//
-#include "SPUMachineFunction.h"
-
-using namespace llvm;
-
-void SPUFunctionInfo::anchor() { }
+def GenericItineraries : ProcessorItineraries<[], [], []>;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6bbe075a1b61
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 39257d92c400..d412ed2be180 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -1,4 +1,4 @@
-//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,23 +7,24 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
//
//===----------------------------------------------------------------------===//
-#ifndef CELLSPUSELECTIONDAGINFO_H
-#define CELLSPUSELECTIONDAGINFO_H
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
-class SPUTargetMachine;
+class AArch64TargetMachine;
-class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+ const AArch64Subtarget *Subtarget;
public:
- explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
- ~SPUSelectionDAGInfo();
+ explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+ ~AArch64SelectionDAGInfo();
};
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 000000000000..d17b73820994
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+ : AArch64GenSubtargetInfo(TT, CPU, FS)
+ , HasNEON(true)
+ , HasCrypto(true)
+ , TargetTriple(TT) {
+
+ ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
new file mode 100644
index 000000000000..2e9205fc9924
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+ bool HasNEON;
+ bool HasCrypto;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_AARCH64_SUBTARGET_H
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
new file mode 100644
index 000000000000..df599d599dd6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -0,0 +1,81 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the AArch64TargetMachine
+// methods. Principally just setting up the passes needed to generate correct
+// code on this architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+ RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ InstrInfo(Subtarget),
+ DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+ AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AArch64TargetMachine &getAArch64TargetMachine() const {
+ return getTM<AArch64TargetMachine>();
+ }
+
+ const AArch64Subtarget &getAArch64Subtarget() const {
+ return *getAArch64TargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+ addPass(&UnpackMachineBundlesID);
+ addPass(createAArch64BranchFixupPass());
+ return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+ addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+ return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 000000000000..c1f47c2e5372
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+ AArch64Subtarget Subtarget;
+ AArch64InstrInfo InstrInfo;
+ const DataLayout DL;
+ AArch64TargetLowering TLInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64FrameLowering FrameLowering;
+
+public:
+ AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ const AArch64InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ const DataLayout *getDataLayout() const { return &DL; }
+
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
new file mode 100644
index 000000000000..b4452f514590
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
new file mode 100644
index 000000000000..bf0565a79ec8
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -0,0 +1,31 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+ /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+ /// AArch64.
+ class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
new file mode 100644
index 000000000000..69bb80a48537
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -0,0 +1,2197 @@
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the (GNU-style) assembly parser for the AArch64
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64Operand;
+
+class AArch64AsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+#define GET_ASSEMBLER_HEADER
+#include "AArch64GenAsmMatcher.inc"
+
+public:
+ enum AArch64MatchResultTy {
+ Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "AArch64GenAsmMatcher.inc"
+ };
+
+ AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ // These are the public interface of the MCTargetAsmParser
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirectiveTLSDescCall(SMLoc L);
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer&Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ // The rest of the sub-parsers have more freedom over interface: they return
+ // an OperandMatchResultTy because it's less ambiguous than true/false or
+ // -1/0/1 even if it is more verbose
+ OperandMatchResultTy
+ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic);
+
+ OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
+
+ OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
+
+ OperandMatchResultTy
+ ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes);
+
+ OperandMatchResultTy
+ ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes);
+
+ OperandMatchResultTy
+ ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ template<typename SomeNamedImmMapper> OperandMatchResultTy
+ ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
+ }
+
+ OperandMatchResultTy
+ ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ /// Scan the next token (which had better be an identifier) and determine
+ /// whether it represents a general-purpose or vector register. It returns
+ /// true if an identifier was found and populates its reference arguments. It
+ /// does not consume the token.
+ bool
+ IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
+ SMLoc &LayoutLoc) const;
+
+};
+
+}
+
+namespace {
+
+/// Instances of this class represent a parsed AArch64 machine instruction.
+class AArch64Operand : public MCParsedAsmOperand {
+private:
+ enum KindTy {
+ k_ImmWithLSL, // #uimm {, LSL #amt }
+ k_CondCode, // eq/ne/...
+ k_FPImmediate, // Limited-precision floating-point imm
+ k_Immediate, // Including expressions referencing symbols
+ k_Register,
+ k_ShiftExtend,
+ k_SysReg, // The register operand of MRS and MSR instructions
+ k_Token, // The mnemonic; other raw tokens the auto-generated
+ k_WrappedRegister // Load/store exclusive permit a wrapped register.
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ union {
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
+ };
+
+ AArch64Operand(KindTy K, SMLoc S, SMLoc E)
+ : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+
+public:
+ AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+ }
+
+ SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getEndLoc() const { return EndLoc; }
+ void print(raw_ostream&) const;
+ void dump() const;
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register || Kind == k_WrappedRegister)
+ && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == k_Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ A64CC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CondCode.Code;
+ }
+
+ static bool isNonConstantExpr(const MCExpr *E,
+ AArch64MCExpr::VariantKind &Variant) {
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+ Variant = A64E->getKind();
+ return true;
+ } else if (!isa<MCConstantExpr>(E)) {
+ Variant = AArch64MCExpr::VK_AARCH64_None;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isMem() const { return false; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
+ bool isSysReg() const { return Kind == k_SysReg; }
+ bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
+ bool isWrappedReg() const { return Kind == k_WrappedRegister; }
+
+ bool isAddSubImmLSL0() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 0) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ bool isAddSubImmLSL12() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 12) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
+ if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
+ return false;
+
+ if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
+ return false;
+
+ return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+ }
+
+ bool isAdrpLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOT
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
+ }
+
+ return isLabel<21, 4096>();
+ }
+
+ template<unsigned RegWidth> bool isBitfieldWidth() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ template<int RegWidth>
+ bool isCVTFixedPos() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ bool isFMOVImm() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ return A64Imms::isFPImm(RealVal, ImmVal);
+ }
+
+ bool isFPZero() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ return RealVal.isPosZero();
+ }
+
+ template<unsigned field_width, unsigned scale>
+ bool isLabel() const {
+ if (!isImm()) return false;
+
+ if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
+ return true;
+ } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Min = - (scale * (1LL << (field_width - 1)));
+ int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
+ return (Val % scale) == 0 && Val >= Min && Val <= Max;
+ }
+
+ // N.b. this disallows explicit relocation specifications via an
+ // AArch64MCExpr. Users needing that behaviour
+ return false;
+ }
+
+ bool isLane1() const {
+ if (!isImm()) return false;
+
+ // Because it's come through custom assembly parsing, it must always be a
+ // constant expression.
+ return cast<MCConstantExpr>(getImm())->getValue() == 1;
+ }
+
+ bool isLoadLitLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+ }
+
+ return isLabel<19, 4>();
+ }
+
+ template<unsigned RegWidth> bool isLogicalImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ if (!CE) return false;
+
+ uint32_t Bits;
+ return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+ }
+
+ template<unsigned RegWidth> bool isLogicalImmMOV() const {
+ if (!isLogicalImm<RegWidth>()) return false;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ // The move alias for ORR is only valid if the immediate cannot be
+ // represented with a move (immediate) instruction; they take priority.
+ int UImm16, Shift;
+ return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
+ && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+ }
+
+ template<int MemSize>
+ bool isOffsetUImm12() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ // Assume they know what they're doing for now if they've given us a
+ // non-constant expression. In principle we could check for ridiculous
+ // things that can't possibly work or relocations that would almost
+ // certainly break resulting code.
+ if (!CE)
+ return true;
+
+ int64_t Val = CE->getValue();
+
+ // Must be a multiple of the access size in bytes.
+ if ((Val & (MemSize - 1)) != 0) return false;
+
+ // Must be 12-bit unsigned
+ return Val >= 0 && Val <= 0xfff * MemSize;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
+ bool isShift() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+ }
+
+ bool isMOVN32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVN64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+
+ bool isMOVZ32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVZ64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_ABS_G2,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMoveWideImm(unsigned RegWidth,
+ AArch64MCExpr::VariantKind *PermittedModifiers,
+ unsigned NumModifiers) const {
+ if (!isImmWithLSL()) return false;
+
+ if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
+ if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+
+ AArch64MCExpr::VariantKind Modifier;
+ if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
+ // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
+ if (!ImmWithLSL.ImplicitAmount) return false;
+
+ for (unsigned i = 0; i < NumModifiers; ++i)
+ if (PermittedModifiers[i] == Modifier) return true;
+
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff;
+ }
+
+ template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
+ bool isMoveWideMovAlias() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int UImm16, Shift;
+ uint64_t Value = CE->getValue();
+
+ // If this is a 32-bit instruction then all bits above 32 should be the
+ // same: either of these is fine because signed/unsigned values should be
+ // permitted.
+ if (RegWidth == 32) {
+ if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+ return false;
+
+ Value &= 0xffffffffULL;
+ }
+
+ return isValidImm(RegWidth, Value, UImm16, Shift);
+ }
+
+ bool isMSRWithReg() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMSRPState() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMRS() const {
+ if (!isSysReg()) return false;
+
+ // First check against specific MSR-only (write-only) registers
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isPRFM() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ if (!CE)
+ return false;
+
+ return CE->getValue() >= 0 && CE->getValue() <= 31;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return ShiftExtend.Amount <= 4;
+ }
+
+ bool isRegExtendLSL() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != A64SE::LSL)
+ return false;
+
+ return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+ }
+
+ template<int MemSize> bool isSImm7Scaled() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int64_t Val = CE->getValue();
+ if (Val % MemSize != 0) return false;
+
+ Val /= MemSize;
+
+ return Val >= -64 && Val < 64;
+ }
+
+ template<int BitWidth>
+ bool isSImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= -(1LL << (BitWidth - 1))
+ && CE->getValue() < (1LL << (BitWidth - 1));
+ }
+
+ template<int bitWidth>
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+ }
+
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ return isa<MCConstantExpr>(getImm());
+ }
+
+ static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
+ unsigned ShiftAmount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
+ Op->ImmWithLSL.Val = Val;
+ Op->ImmWithLSL.ShiftAmount = ShiftAmount;
+ Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
+ Op->CondCode.Code = Code;
+ return Op;
+ }
+
+ static AArch64Operand *CreateFPImm(double Val,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
+ Op->FPImm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
+ Op->Imm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
+ unsigned Amount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
+ Op->ShiftExtend.ShiftType = ShiftTyp;
+ Op->ShiftExtend.Amount = Amount;
+ Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+ static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ template<unsigned RegWidth>
+ void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
+ Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+ }
+
+ void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+
+ Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+ }
+
+ void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+ }
+
+ void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ A64Imms::isFPImm(RealVal, ImmVal);
+
+ Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ }
+
+ void addFPZeroOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ Inst.addOperand(MCOperand::CreateImm(0));
+ }
+
+ void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Encoded = A64InvertCondCode(getCondCode());
+ Inst.addOperand(MCOperand::CreateImm(Encoded));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ template<int MemSize>
+ void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue() / MemSize;
+ Inst.addOperand(MCOperand::CreateImm(Val & 0x7f));
+ }
+
+ template<int BitWidth>
+ void addSImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1)));
+ }
+
+ void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
+ assert (N == 1 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+ }
+
+ template<unsigned field_width, unsigned scale>
+ void addLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+
+ if (!CE) {
+ addExpr(Inst, Imm.Val);
+ return;
+ }
+
+ int64_t Val = CE->getValue();
+ assert(Val % scale == 0 && "Unaligned immediate in instruction");
+ Val /= scale;
+
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+ }
+
+ template<int MemSize>
+ void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
+ } else {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ }
+ }
+
+ template<unsigned RegWidth>
+ void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ uint32_t Bits;
+ A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMRSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+
+ AArch64MCExpr::VariantKind Variant;
+ if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
+ return;
+ }
+
+ // We know it's relocated
+ switch (Variant) {
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ Inst.addOperand(MCOperand::CreateImm(1));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ Inst.addOperand(MCOperand::CreateImm(2));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ Inst.addOperand(MCOperand::CreateImm(3));
+ break;
+ default: llvm_unreachable("Inappropriate move wide relocation");
+ }
+ }
+
+ template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
+ void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int UImm16, Shift;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+
+ if (RegWidth == 32) {
+ Value &= 0xffffffffULL;
+ }
+
+ bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
+ (void)Valid;
+ assert(Valid && "Invalid immediates should have been weeded out by now");
+
+ Inst.addOperand(MCOperand::CreateImm(UImm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ }
+
+ void addPRFMOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ assert(CE->getValue() >= 0 && CE->getValue() <= 31
+ && "PRFM operand should be 5-bits");
+
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ // For Add-sub (extended register) operands.
+ void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+
+ // For the extend in load-store (register offset) instructions.
+ template<unsigned MemSize>
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
+ addAddrRegExtendOperands(Inst, N, MemSize);
+ }
+
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
+ unsigned MemSize) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // First bit of Option is set in instruction classes, the high two bits are
+ // as follows:
+ unsigned OptionHi = 0;
+ switch (ShiftExtend.ShiftType) {
+ case A64SE::UXTW:
+ case A64SE::LSL:
+ OptionHi = 1;
+ break;
+ case A64SE::SXTW:
+ case A64SE::SXTX:
+ OptionHi = 3;
+ break;
+ default:
+ llvm_unreachable("Invalid extend type for register offset");
+ }
+
+ unsigned S = 0;
+ if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
+ S = 1;
+ else if (MemSize != 1 && ShiftExtend.Amount != 0)
+ S = 1;
+
+ Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+ }
+ void addShiftOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+};
+
+} // end anonymous namespace.
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+
+ // See if the operand has a custom parser
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // It could either succeed, fail or just not care.
+ if (ResTy != MatchOperand_NoMatch)
+ return ResTy;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ case AsmToken::Identifier: {
+ // It might be in the LSL/UXTB family ...
+ OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+
+ // We can only continue if no tokens were eaten.
+ if (GotShift != MatchOperand_NoMatch)
+ return GotShift;
+
+ // ... or it might be a register ...
+ uint32_t NumLanes = 0;
+ OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
+ assert(GotReg != MatchOperand_ParseFail
+ && "register parsing shouldn't partially succeed");
+
+ if (GotReg == MatchOperand_Success) {
+ if (Parser.getTok().is(AsmToken::LBrac))
+ return ParseNEONLane(Operands, NumLanes);
+ else
+ return MatchOperand_Success;
+ }
+
+ // ... or it might be a symbolish thing
+ }
+ // Fall through
+ case AsmToken::LParen: // E.g. (strcmp-4)
+ case AsmToken::Integer: // 1f, 2b labels
+ case AsmToken::String: // quoted labels
+ case AsmToken::Dot: // . is Current location
+ case AsmToken::Dollar: // $ is PC
+ case AsmToken::Colon: {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::Hash: { // Immediates
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+ Parser.Lex();
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::LBrac: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ // There's no comma after a '[', so we can parse the next operand
+ // immediately.
+ return ParseOperand(Operands, Mnemonic);
+ }
+ // The following will likely be useful later, but not in very early cases
+ case AsmToken::LCurly: // Weird SIMD lists
+ llvm_unreachable("Don't know how to deal with '{' in operand");
+ return MatchOperand_ParseFail;
+ }
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
+ if (getLexer().is(AsmToken::Colon)) {
+ AArch64MCExpr::VariantKind RefKind;
+
+ OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
+ if (ResTy != MatchOperand_Success)
+ return ResTy;
+
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return MatchOperand_ParseFail;
+
+ ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
+ return MatchOperand_Success;
+ }
+
+ // No weird AArch64MCExpr prefix
+ return getParser().parseExpression(ExprVal)
+ ? MatchOperand_ParseFail : MatchOperand_Success;
+}
+
+// A lane attached to a NEON register. "[N]", which should yield three tokens:
+// '[', N, ']'. A hash is not allowed to precede the immediate here.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "expected lane number");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().getIntVal() >= NumLanes) {
+ Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
+ getContext());
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat actual lane
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+
+
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "expected ']' after lane");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex(); // Eat ']'
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
+ assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+ .Case("got", AArch64MCExpr::VK_AARCH64_GOT)
+ .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12)
+ .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12)
+ .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0)
+ .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
+ .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1)
+ .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
+ .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2)
+ .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
+ .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3)
+ .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0)
+ .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1)
+ .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2)
+ .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2)
+ .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1)
+ .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
+ .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0)
+ .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
+ .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
+ .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
+ .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
+ .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
+ .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
+ .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL)
+ .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
+ .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2)
+ .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1)
+ .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
+ .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0)
+ .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
+ .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12)
+ .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12)
+ .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
+ .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC)
+ .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
+ .Default(AArch64MCExpr::VK_AARCH64_None);
+
+ if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat identifier
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(),
+ "expected ':' after relocation specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmWithLSLOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ const MCExpr *Imm;
+ if (ParseImmediate(Imm) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Eat ','
+ Parser.Lex();
+
+ // The optional operand must be "lsl #N" where N is non-negative.
+ if (Parser.getTok().is(AsmToken::Identifier)
+ && Parser.getTok().getIdentifier().lower() == "lsl") {
+ Parser.Lex();
+
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+ return MatchOperand_ParseFail;
+ }
+ }
+ }
+
+ int64_t ShiftAmount = Parser.getTok().getIntVal();
+
+ if (ShiftAmount < 0) {
+ Error(Parser.getTok().getLoc(), "positive shift amount required");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the number
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
+ false, S, E));
+ return MatchOperand_Success;
+}
+
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCondCodeOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ StringRef Tok = Parser.getTok().getIdentifier();
+ A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+
+ if (CondCode == A64CC::Invalid)
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat condition code
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCRxOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerTok = Parser.getTok().getIdentifier().lower();
+ StringRef Tok(LowerTok);
+ if (Tok[0] != 'c') {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ uint32_t CRNum;
+ bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+ if (BadNum || CRNum > 15) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
+
+ Parser.Lex();
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseFPImmOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ bool Negative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Negative = true;
+ Parser.Lex(); // Eat '-'
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat '+'
+ }
+
+ if (Parser.getTok().isNot(AsmToken::Real)) {
+ Error(S, "Expected floating-point immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
+ if (Negative) RealVal.changeSign();
+ double DblVal = RealVal.convertToDouble();
+
+ Parser.Lex(); // Eat real number
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
+ return MatchOperand_Success;
+}
+
+
+// Automatically generated
+static unsigned MatchRegisterName(StringRef Name);
+
+bool
+AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
+ StringRef &Layout,
+ SMLoc &LayoutLoc) const {
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.isNot(AsmToken::Identifier))
+ return false;
+
+ std::string LowerReg = Tok.getString().lower();
+ size_t DotPos = LowerReg.find('.');
+
+ RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+ if (RegNum == AArch64::NoRegister) {
+ RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+ .Case("ip0", AArch64::X16)
+ .Case("ip1", AArch64::X17)
+ .Case("fp", AArch64::X29)
+ .Case("lr", AArch64::X30)
+ .Default(AArch64::NoRegister);
+ }
+ if (RegNum == AArch64::NoRegister)
+ return false;
+
+ SMLoc S = Tok.getLoc();
+ RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+
+ if (DotPos == StringRef::npos) {
+ Layout = StringRef();
+ } else {
+ // Everything afterwards needs to be a literal token, expected to be
+ // '.2d','.b' etc for vector registers.
+
+ // This StringSwitch validates the input and (perhaps more importantly)
+ // gives us a permanent string to use in the token (a pointer into LowerReg
+ // would go out of scope when we return).
+ LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
+ std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+ Layout = StringSwitch<const char *>(LayoutText)
+ .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
+ .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
+ .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
+ .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+ .Default("");
+
+ if (Layout.size() == 0) {
+ // Malformed register
+ return false;
+ }
+ }
+
+ return true;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes) {
+ unsigned RegNum;
+ StringRef Layout;
+ SMLoc RegEndLoc, LayoutLoc;
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+
+ if (Layout.size() != 0) {
+ unsigned long long TmpLanes = 0;
+ llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
+ if (TmpLanes != 0) {
+ NumLanes = TmpLanes;
+ } else {
+ // If the number of lanes isn't specified explicitly, a valid instruction
+ // will have an element specifier and be capable of acting on the entire
+ // vector register.
+ switch (Layout.back()) {
+ default: llvm_unreachable("Invalid layout specifier");
+ case 'b': NumLanes = 16; break;
+ case 'h': NumLanes = 8; break;
+ case 's': NumLanes = 4; break;
+ case 'd': NumLanes = 2; break;
+ }
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
+ }
+
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+bool
+AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ // This callback is used for things like DWARF frame directives in
+ // assembly. They don't care about things like NEON layouts or lanes, they
+ // just want to be able to produce the DWARF register number.
+ StringRef LayoutSpec;
+ SMLoc RegEndLoc, LayoutLoc;
+ StartLoc = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
+ return true;
+
+ Parser.Lex();
+ EndLoc = Parser.getTok().getLoc();
+
+ return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Since these operands occur in very limited circumstances, without
+ // alternatives, we actually signal an error if there is no match. If relaxing
+ // this, beware of unintended consequences: an immediate will be accepted
+ // during matching, no matter how it gets into the AArch64Operand.
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ if (Tok.is(AsmToken::Identifier)) {
+ bool ValidName;
+ uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+
+ if (!ValidName) {
+ Error(S, "operand specifier not recognised");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // We're done with the identifier. Eat it
+
+ SMLoc E = Parser.getTok().getLoc();
+ const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
+ Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
+ return MatchOperand_Success;
+ } else if (Tok.is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ const MCExpr *ImmVal;
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
+ Error(S, "Invalid immediate for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
+ return MatchOperand_Success;
+ }
+
+ Error(S, "unexpected operand for instruction");
+ return MatchOperand_ParseFail;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseSysRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+
+ // Any MSR/MRS operand will be an identifier, and we want to store it as some
+ // kind of string: SPSel is valid for two different forms of MSR with two
+ // different encodings. There's no collision at the moment, but the potential
+ // is there.
+ if (!Tok.is(AsmToken::Identifier)) {
+ return MatchOperand_NoMatch;
+ }
+
+ SMLoc S = Tok.getLoc();
+ Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
+ Parser.Lex(); // Eat identifier
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseLSXAddressOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ unsigned RegNum;
+ SMLoc RegEndLoc, LayoutLoc;
+ StringRef Layout;
+ if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
+ || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
+ || Layout.size() != 0) {
+ // Check Layout.size because we don't want to let "x3.4s" or similar
+ // through.
+ return MatchOperand_NoMatch;
+ }
+ Parser.Lex(); // Eat register
+
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // We're done
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Otherwise, only ", #0" is valid
+
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat ','
+
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '#'
+
+ if (Parser.getTok().isNot(AsmToken::Integer)
+ || Parser.getTok().getIntVal() != 0 ) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '0'
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseShiftExtend(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ std::string LowerID = IDVal.lower();
+
+ A64SE::ShiftExtSpecifiers Spec =
+ StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
+ .Case("lsl", A64SE::LSL)
+ .Case("lsr", A64SE::LSR)
+ .Case("asr", A64SE::ASR)
+ .Case("ror", A64SE::ROR)
+ .Case("uxtb", A64SE::UXTB)
+ .Case("uxth", A64SE::UXTH)
+ .Case("uxtw", A64SE::UXTW)
+ .Case("uxtx", A64SE::UXTX)
+ .Case("sxtb", A64SE::SXTB)
+ .Case("sxth", A64SE::SXTH)
+ .Case("sxtw", A64SE::SXTW)
+ .Case("sxtx", A64SE::SXTX)
+ .Default(A64SE::Invalid);
+
+ if (Spec == A64SE::Invalid)
+ return MatchOperand_NoMatch;
+
+ // Eat the shift
+ SMLoc S, E;
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
+ Spec != A64SE::ASR && Spec != A64SE::ROR) {
+ // The shift amount can be omitted for the extending versions, but not real
+ // shifts:
+ // add x0, x0, x0, uxtb
+ // is valid, and equivalent to
+ // add x0, x0, x0, uxtb #0
+
+ if (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::EndOfStatement) ||
+ Parser.getTok().is(AsmToken::RBrac)) {
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true,
+ S, E));
+ return MatchOperand_Success;
+ }
+ }
+
+ // Eat # at beginning of immediate
+ if (!Parser.getTok().is(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(),
+ "expected #imm after shift specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ // Make sure we do actually have a number
+ if (!Parser.getTok().is(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(),
+ "expected integer shift amount");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Amount = Parser.getTok().getIntVal();
+ Parser.Lex();
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false,
+ S, E));
+
+ return MatchOperand_Success;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool AArch64AsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case AArch64::BFIwwii:
+ case AArch64::BFIxxii:
+ case AArch64::SBFIZwwii:
+ case AArch64::SBFIZxxii:
+ case AArch64::UBFIZwwii:
+ case AArch64::UBFIZxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+
+ if (ImmR != 0 && ImmS >= ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested insert overflows register");
+ }
+ return false;
+ }
+ case AArch64::BFXILwwii:
+ case AArch64::BFXILxxii:
+ case AArch64::SBFXwwii:
+ case AArch64::SBFXxxii:
+ case AArch64::UBFXwwii:
+ case AArch64::UBFXxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+ int64_t RegWidth = 0;
+ switch (Inst.getOpcode()) {
+ case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
+ RegWidth = 64;
+ break;
+ case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
+ RegWidth = 32;
+ break;
+ }
+
+ if (ImmS >= RegWidth || ImmS < ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested extract overflows register");
+ }
+ return false;
+ }
+ case AArch64::ICix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (!A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::ICi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op requires a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (!A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op requires a register");
+ }
+ return false;
+ }
+ }
+
+ return false;
+}
+
+
+// Parses the instruction *together with* all operands, appending each parsed
+// operand to the "Operands" list
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ size_t CondCodePos = Name.find('.');
+
+ StringRef Mnemonic = Name.substr(0, CondCodePos);
+ Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
+
+ if (CondCodePos != StringRef::npos) {
+ // We have a condition code
+ SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
+ StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos);
+ A64CC::CondCodes Code;
+
+ Code = A64StringToCondCode(CondStr);
+
+ if (Code == A64CC::Invalid) {
+ Error(S, "invalid condition code");
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
+
+ Operands.push_back(AArch64Operand::CreateToken(".", DotL));
+ SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
+ Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+ }
+
+ // Now we parse the operands of this instruction
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+
+ // After successfully parsing some operands there are two special cases to
+ // consider (i.e. notional operands not separated by commas). Both are due
+ // to memory specifiers:
+ // + An RBrac will end an address for load/store/prefetch
+ // + An '!' will indicate a pre-indexed operation.
+ //
+ // It's someone else's responsibility to make sure these tokens are sane
+ // in the given context!
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex();
+ }
+
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+ Parser.Lex();
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "expected comma before next operand");
+ }
+
+ // Eat the EndOfStatement
+ Parser.Lex();
+
+ return false;
+}
+
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".hword")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".xword")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ else if (IDVal == ".tlsdesccall")
+ return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
+
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+// parseDirectiveTLSDescCall:
+// ::= .tlsdesccall symbol
+bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return Error(L, "expected symbol after directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+
+ MCInst Inst;
+ Inst.setOpcode(AArch64::TLSDESCCALL);
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+ getParser().getStreamer().EmitInstruction(Inst);
+ return false;
+}
+
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+
+ if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ if (validateInstruction(Inst, Operands))
+ return true;
+
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+
+ case Match_AddSubRegExtendSmall:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegExtendLarge:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegShift32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
+ case Match_AddSubRegShift64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
+ case Match_AddSubSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register, symbol or integer in range [0, 4095]");
+ case Match_CVTFixedPos32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_CVTFixedPos64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
+ case Match_CondCode:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected AArch64 condition code");
+ case Match_FPImm:
+ // Any situation which allows a nontrivial floating-point constant also
+ // allows a register.
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or floating-point constant");
+ case Match_FPZero:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected floating-point constant #0.0");
+ case Match_Label:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected label or encodable integer pc offset");
+ case Match_Lane1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected lane specifier '[1]'");
+ case Match_LoadStoreExtend32_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0");
+ case Match_LoadStoreExtend32_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend32_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend32_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend32_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtw' with optional shift of #0 or #4");
+ case Match_LoadStoreExtend64_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0");
+ case Match_LoadStoreExtend64_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend64_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend64_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend64_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
+ case Match_LoadStoreSImm7_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 4 in range [-256, 252]");
+ case Match_LoadStoreSImm7_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 8 in range [-512, 508]");
+ case Match_LoadStoreSImm7_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 16 in range [-1024, 1016]");
+ case Match_LoadStoreSImm9:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [-256, 255]");
+ case Match_LoadStoreUImm12_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 4095]");
+ case Match_LoadStoreUImm12_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 8190]");
+ case Match_LoadStoreUImm12_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 16380]");
+ case Match_LoadStoreUImm12_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 32760]");
+ case Match_LoadStoreUImm12_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 65520]");
+ case Match_LogicalSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or logical immediate");
+ case Match_MOVWUImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected relocated symbol or integer in range [0, 65535]");
+ case Match_MRS:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected readable system register");
+ case Match_MSR:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected writable system register or pstate");
+ case Match_NamedImm_at:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]");
+ case Match_NamedImm_dbarrier:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or symbolic barrier operand");
+ case Match_NamedImm_dc:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'dc' operand");
+ case Match_NamedImm_ic:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'");
+ case Match_NamedImm_isb:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or 'sy'");
+ case Match_NamedImm_prefetch:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected prefetch hint: p(ld|st|i)l[123](strm|keep)");
+ case Match_NamedImm_tlbi:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected translation buffer invalidation operand");
+ case Match_UImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 65535]");
+ case Match_UImm3:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_UImm4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_UImm5:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_UImm6:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
+ case Match_UImm7:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 127]");
+ case Match_Width32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 31]");
+ case Match_Width64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 63]");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+void AArch64Operand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_CondCode:
+ OS << "<CondCode: " << CondCode.Code << ">";
+ break;
+ case k_FPImmediate:
+ OS << "<fpimm: " << FPImm.Val << ">";
+ break;
+ case k_ImmWithLSL:
+ OS << "<immwithlsl: imm=" << ImmWithLSL.Val
+ << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+ break;
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_Register:
+ OS << "<register " << getReg() << '>';
+ break;
+ case k_Token:
+ OS << '\'' << getToken() << '\'';
+ break;
+ case k_ShiftExtend:
+ OS << "<shift: type=" << ShiftExtend.ShiftType
+ << ", amount=" << ShiftExtend.Amount << ">";
+ break;
+ case k_SysReg: {
+ StringRef Name(SysReg.Data, SysReg.Length);
+ OS << "<sysreg: " << Name << '>';
+ break;
+ }
+ default:
+ llvm_unreachable("No idea how to print this kind of operand");
+ break;
+ }
+}
+
+void AArch64Operand::dump() const {
+ print(errs());
+}
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+ RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..a018a0aa7b36
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmParser
+ AArch64AsmParser.cpp
+ )
+
+add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..bd1fcaf1ffe8
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info MC MCParser Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
new file mode 100644
index 000000000000..56c9ef52ea58
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmParser
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
new file mode 100644
index 000000000000..8164d6f73c97
--- /dev/null
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -0,0 +1,36 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+ AArch64AsmPrinter.cpp
+ AArch64BranchFixupPass.cpp
+ AArch64FrameLowering.cpp
+ AArch64ISelDAGToDAG.cpp
+ AArch64ISelLowering.cpp
+ AArch64InstrInfo.cpp
+ AArch64MachineFunctionInfo.cpp
+ AArch64MCInstLower.cpp
+ AArch64RegisterInfo.cpp
+ AArch64SelectionDAGInfo.cpp
+ AArch64Subtarget.cpp
+ AArch64TargetMachine.cpp
+ AArch64TargetObjectFile.cpp
+ )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils) \ No newline at end of file
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
new file mode 100644
index 000000000000..12c1b8f4c81a
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -0,0 +1,803 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions necessary to decode AArch64 instruction
+// bitpatterns into MCInsts (with the help of TableGenerated information from
+// the instruction definitions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ /// Initializes the disassembler.
+ ///
+ AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+ : MCDisassembler(STI), RegInfo(Info) {
+ }
+
+ ~AArch64Disassembler() {
+ }
+
+ /// See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus
+DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+ llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+ return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder) {
+ // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+ // S}. Hence we want to check bit 1.
+ if (!(OptionHiS & 2))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+ // between 0 and 31.
+ if (Imm6Bits > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+ if (Imm6Bits < 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder) {
+ // Any bits are valid in the instruction (they're architecturally ignored),
+ // but a code generator should insert 0.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Imm16 = FullImm & 0xffff;
+ unsigned Shift = FullImm >> 16;
+
+ if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ uint64_t Imm;
+ if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values 0-4 are valid for this 3-bit field
+ if (ShiftAmount > 4)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values below 32 are valid for a 32-bit register
+ if (ShiftAmount > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+ unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+ unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+ // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+ // out assertions that it thinks should never be hit.
+ enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+ Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+ if (!SF) {
+ // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+ if (ImmR > 31 || ImmS > 31)
+ return MCDisassembler::Fail;
+ }
+
+ if (SF) {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // ASR and LSR have more specific patterns so they won't get here:
+ assert(!(ImmS == 31 && !SF && Opc != BFM)
+ && "shift should have used auto decode");
+ assert(!(ImmS == 63 && SF && Opc != BFM)
+ && "shift should have used auto decode");
+
+ // Extension instructions similarly:
+ if (Opc == SBFM && ImmR == 0) {
+ assert((ImmS != 7 && ImmS != 15) && "extension got here");
+ assert((ImmS != 31 || SF == 0) && "extension got here");
+ } else if (Opc == UBFM && ImmR == 0) {
+ assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+ }
+
+ if (Opc == UBFM) {
+ // It might be a LSL instruction, which actually takes the shift amount
+ // itself as an MCInst operand.
+ if (SF && (ImmS + 1) % 64 == ImmR) {
+ Inst.setOpcode(AArch64::LSLxxi);
+ Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+ return MCDisassembler::Success;
+ } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+ Inst.setOpcode(AArch64::LSLwwi);
+ Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+ return MCDisassembler::Success;
+ }
+ }
+
+ // Otherwise it's definitely either an extract or an insert depending on which
+ // of ImmR or ImmS is larger.
+ unsigned ExtractOp, InsertOp;
+ switch (Opc) {
+ default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+ case SBFM:
+ ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+ InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+ break;
+ case BFM:
+ ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+ InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+ break;
+ case UBFM:
+ ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+ InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+ break;
+ }
+
+ // Otherwise it's a boring insert or extract
+ Inst.addOperand(MCOperand::CreateImm(ImmR));
+ Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+ if (ImmS < ImmR)
+ Inst.setOpcode(InsertOp);
+ else
+ Inst.setOpcode(ExtractOp);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // This decoder exists to add the dummy Lane operand to the MCInst, which must
+ // be 1 in assembly but has no other real manifestation.
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+ if (IsToVec) {
+ DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // Add the lane
+ Inst.addOperand(MCOperand::CreateImm(1));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus Result = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+ unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+ unsigned L = fieldFromInstruction(Insn, 22, 1);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+ // Not an official name, but it turns out that bit 23 distinguishes indexed
+ // from non-indexed operations.
+ unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+ if (Indexed && L == 0) {
+ // The MCInst for an indexed store has an out operand and 4 ins:
+ // Rn_wb, Rt, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // You shouldn't load to the same register twice in an instruction...
+ if (L && Rt == Rt2)
+ Result = MCDisassembler::SoftFail;
+
+ // ... or do any operation that writes-back to a transfer register. But note
+ // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+ if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+ Result = MCDisassembler::SoftFail;
+
+ // Exactly how we decode the MCInst's registers depends on the Opc and V
+ // fields of the instruction. These also obviously determine the size of the
+ // operation so we can fill in that information while we're at it.
+ if (V) {
+ // The instruction operates on the FP/SIMD registers
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ } else {
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ assert(L && "unexpected \"store signed\" attempt");
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Indexed && L == 1) {
+ // The MCInst for an indexed load has 3 out operands and an 3 ins:
+ // Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+ return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Val, 0, 5);
+ unsigned Rn = fieldFromInstruction(Val, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+ unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+ switch (MemSize) {
+ case 2:
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case 3:
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+ }
+
+ if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ SomeNamedImmMapper Mapper;
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+ if (ValidNamed || Mapper.validImm(Val)) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+ unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+ if (Opc == 0 || (V == 1 && Opc == 2)) {
+ // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ if (V == 0 && (Opc == 2 || Size == 3)) {
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 0) {
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 1 && (Opc & 2)) {
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ } else {
+ switch (Size) {
+ case 0:
+ DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Opc != 0 && (V != 1 || Opc != 2)) {
+ // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+ // N.b. The official documentation says undpredictable if Rt == Rn, but this
+ // takes place at the architectural rather than encoding level:
+ //
+ // "STR xzr, [sp], #4" is perfectly valid.
+ if (V == 0 && Rt == Rn && Rn != 31)
+ return MCDisassembler::SoftFail;
+ else
+ return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+ createAArch64Disassembler);
+}
+
+
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..d4bd163dad60
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Disassembler
+ AArch64Disassembler.cpp
+ )
+
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..a93e343886d0
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
new file mode 100644
index 000000000000..5c861207f836
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Disassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 000000000000..82ce80c8b1a1
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+ assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+ if (Value & (1ULL << (BitWidth - 1)))
+ return static_cast<int64_t>(Value) - (1LL << BitWidth);
+ else
+ return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+ O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize) {
+ unsigned ExtImm = MI->getOperand(OpNum).getImm();
+ unsigned OptionHi = ExtImm >> 1;
+ unsigned S = ExtImm & 1;
+ bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+ const char *Ext;
+ switch (OptionHi) {
+ case 1:
+ Ext = (RmSize == 32) ? "uxtw" : "lsl";
+ break;
+ case 3:
+ Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+ break;
+ default:
+ llvm_unreachable("Incorrect Option on load/store (reg offset)");
+ }
+ O << Ext;
+
+ if (S) {
+ unsigned ShiftAmt = Log2_32(MemSize);
+ O << " #" << ShiftAmt;
+ } else if (IsLSL) {
+ O << " #0";
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+ if (Imm12Op.isImm()) {
+ int64_t Imm12 = Imm12Op.getImm();
+ assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+ O << "#" << Imm12;
+ } else {
+ assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+ O << "#" << *Imm12Op.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+
+ printAddSubImmLSL0Operand(MI, OpNum, O);
+
+ O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmROp = MI->getOperand(OpNum);
+ unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+ O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ unsigned Width = ImmSOp.getImm() + 1;
+
+ O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+ unsigned ImmR = ImmROp.getImm();
+ unsigned ImmS = ImmSOp.getImm();
+
+ assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+ O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &CRx = MI->getOperand(OpNum);
+
+ O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+ O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+ assert(MOImm8.isImm()
+ && "Immediate operand required for floating-point immediate inst");
+
+ uint32_t Imm8 = MOImm8.getImm();
+ uint32_t Fraction = Imm8 & 0xf;
+ uint32_t Exponent = (Imm8 >> 4) & 0x7;
+ uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+ float Val = 1.0f + Fraction / 16.0f;
+
+ // That is:
+ // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4,
+ // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+ if (Exponent & 0x4) {
+ Val /= 1 << (7 - Exponent);
+ } else {
+ Val *= 1 << (Exponent + 1);
+ }
+
+ Val = Negative ? -Val : Val;
+
+ o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (!MO.isImm()) {
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+ // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+ uint64_t UImm = MO.getImm();
+ uint64_t Sign = UImm & (1LL << (field_width - 1));
+ int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+ O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint64_t Val;
+ A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+ O << "#0x";
+ O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, int MemSize) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+
+ if (MOImm.isImm()) {
+ uint32_t Imm = MOImm.getImm() * MemSize;
+
+ O << "#" << Imm;
+ } else {
+ O << "#" << *MOImm.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Shift) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ // LSL #0 is not printed
+ if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+ return;
+
+ switch (Shift) {
+ case A64SE::LSL: O << "lsl"; break;
+ case A64SE::LSR: O << "lsr"; break;
+ case A64SE::ASR: O << "asr"; break;
+ case A64SE::ROR: O << "ror"; break;
+ default: llvm_unreachable("Invalid shift specifier in logical instruction");
+ }
+
+ O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &UImm16MO = MI->getOperand(OpNum);
+ const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+ if (UImm16MO.isImm()) {
+ O << '#' << UImm16MO.getImm();
+
+ if (ShiftMO.getImm() != 0)
+ O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+ return;
+ }
+
+ O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ bool ValidName;
+ const MCOperand &MO = MI->getOperand(OpNum);
+ StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+ if (ValidName)
+ O << Name;
+ else
+ O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ bool ValidName;
+ std::string Name = Mapper.toString(MO.getImm(), ValidName);
+ if (ValidName) {
+ O << Name;
+ return;
+ }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Ext) {
+ // FIXME: In principle TableGen should be able to detect this itself far more
+ // easily. We will only accumulate more of these hacks.
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+
+ if (isStackReg(Reg0) || isStackReg(Reg1)) {
+ A64SE::ShiftExtSpecifiers LSLEquiv;
+
+ if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+ LSLEquiv = A64SE::UXTX;
+ else
+ LSLEquiv = A64SE::UXTW;
+
+ if (Ext == LSLEquiv) {
+ O << "lsl #" << MI->getOperand(OpNum).getImm();
+ return;
+ }
+ }
+
+ switch (Ext) {
+ case A64SE::UXTB: O << "uxtb"; break;
+ case A64SE::UXTH: O << "uxth"; break;
+ case A64SE::UXTW: O << "uxtw"; break;
+ case A64SE::UXTX: O << "uxtx"; break;
+ case A64SE::SXTB: O << "sxtb"; break;
+ case A64SE::SXTH: O << "sxth"; break;
+ case A64SE::SXTW: O << "sxtw"; break;
+ case A64SE::SXTX: O << "sxtx"; break;
+ default: llvm_unreachable("Unexpected shift type for printing");
+ }
+
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getImm() != 0)
+ O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+ O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a special assembler directive which applies an
+ // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+ // form outside the normal TableGenerated scheme.
+ O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+ } else if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
+
+ printAnnotation(O, Annot);
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
new file mode 100644
index 000000000000..639fa869c016
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -0,0 +1,172 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+ AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+ // Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+ template<unsigned MemSize, unsigned RmSize>
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+ }
+
+
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize);
+
+ void printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+ void printAddSubImmLSL12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+
+ void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+ void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ template<int MemScale>
+ void printOffsetUImm12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &o) {
+ printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+ }
+
+ void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o, int MemScale);
+
+ template<unsigned field_width, unsigned scale>
+ void printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<typename SomeNamedImmMapper>
+ void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+ }
+
+ void printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printMRSOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+ }
+
+ void printMSROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+ }
+
+ void printShiftOperand(const char *name, const MCInst *MI,
+ unsigned OpIdx, raw_ostream &O);
+
+ void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("lsr", MI, OpNum, O);
+ }
+ void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("asr", MI, OpNum, O);
+ }
+ void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("ror", MI, OpNum, O);
+ }
+
+ template<A64SE::ShiftExtSpecifiers Shift>
+ void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand(MI, OpNum, O, Shift);
+ }
+
+ void printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+ void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<int MemSize> void
+ printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<A64SE::ShiftExtSpecifiers EXT>
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printRegExtendOperand(MI, OpNum, O, EXT);
+ }
+
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ bool isStackReg(unsigned RegNo) {
+ return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+ }
+
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..d4b980a94d9b
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+ AArch64InstPrinter.cpp
+ )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
+
diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..4836c7c45d44
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
new file mode 100644
index 000000000000..1c36a8dea798
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmPrinter
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
new file mode 100644
index 000000000000..3b296fdddc04
--- /dev/null
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -0,0 +1,36 @@
+;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
+[component_0]
+type = TargetGroup
+name = AArch64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+;has_jit = 1
+
+[component_1]
+type = Library
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
new file mode 100644
index 000000000000..a3373b1087bb
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -0,0 +1,585 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCAsmBackend class,
+// which is principally concerned with relaxation of the various fixup kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
+public:
+ AArch64AsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(),
+ STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+ {}
+
+
+ ~AArch64AsmBackend() {
+ delete STI;
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ virtual void processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+ // ~0xfff. This means that the required offset to reach a symbol can vary by
+ // up to one step depending on where the ADRP is in memory. For example:
+ //
+ // ADRP x0, there
+ // there:
+ //
+ // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+ // we'll need that as an offset. At any other address "there" will be in the
+ // same page as the ADRP and the instruction should encode 0x0. Assuming the
+ // section isn't 0x1000-aligned, we therefore need to delegate this decision
+ // to the linker -- a relocation!
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+ IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ uint8_t OSABI;
+ ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ unsigned int getNumFixupKinds() const {
+ return AArch64::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_add_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst128_lo12", 0, 32, 0 },
+{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_uabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g3", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g2", 0, 32, 0 },
+{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_tprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_tprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_call", 0, 0, 0 }
+ };
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst&) const {
+ return false;
+ }
+
+ void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+ llvm_unreachable("Cannot relax instructions");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createAArch64ELFObjectWriter(OS, OSABI);
+ }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Correct for now. With all instructions 32-bit only very low-level
+ // considerations could make you select something which may fail.
+ return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // Can't emit NOP with size not multiple of 32-bits
+ if (Count % 4 != 0)
+ return false;
+
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xd503201f);
+
+ return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+ unsigned lo2 = Value & 0x3;
+ unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+ return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_2:
+ assert((int64_t)Value >= -32768 &&
+ (int64_t)Value <= 65536 &&
+ "Out of range ABS16 fixup");
+ return Value;
+ case FK_Data_4:
+ assert((int64_t)Value >= -(1LL << 31) &&
+ (int64_t)Value <= (1LL << 32) - 1 &&
+ "Out of range ABS32 fixup");
+ return Value;
+ case FK_Data_8:
+ return Value;
+
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+ // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+ case AArch64::fixup_a64_ld_prel:
+ // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+ // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+ return (Value & 0x1ffffc) << 3;
+
+ case AArch64::fixup_a64_adr_prel:
+ // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+ // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+ return ADRImmBits(Value & 0x1fffff);
+
+ case AArch64::fixup_a64_adr_prel_page:
+ // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+ // F000 of the result of the operation, checking that -2^32 <= result <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+ case AArch64::fixup_a64_add_tprel_hi12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+ return (Value & 0xfff000) >> 2;
+
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_add_tprel_lo12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+ // FFF of G(TLSDESC(S+A)), with no overflow check.
+ case AArch64::fixup_a64_add_lo12:
+ // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+ // S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_lo12:
+ // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+ // of S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_lo12:
+ // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+ // of S+A, with no overflow check.
+ return (Value & 0xffe) << 9;
+
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_lo12:
+ // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+ // of S+A, with no overflow check.
+ return (Value & 0xffc) << 8;
+
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_lo12:
+ // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+ // of S+A, with no overflow check.
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_ldst128_lo12:
+ // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+ // of S+A, with no overflow check.
+ return (Value & 0xff0) << 6;
+
+ case AArch64::fixup_a64_movw_uabs_g0:
+ // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+ // with a check that S+A < 2^16
+ assert(Value <= 0xffff && "Out of range move wide fixup");
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+ // S+A with no overflow check.
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g1:
+ // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+ // S+A with a check that S+A < 2^32
+ assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+ // FFFF0000 of S+A with no overflow check.
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 of S+A with a check that S+A < 2^48
+ assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+ // 0000 of S+A with no overflow check.
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g3:
+ // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 0000 of S+A (no overflow check needed)
+ return ((Value >> 48) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+ // to bits FFFF of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g0:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+ // bits FFFF of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g0: {
+ // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+ // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = (Value & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ // MCCodeEmitter should have encoded a MOVN, which is fine.
+ Value = (~Value & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+ case AArch64::fixup_a64_movw_tprel_g1:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+ // bits FFFF0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g1: {
+ // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 16) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 16) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+ // to bits FFFF 0000 0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g2:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+ // bits FFFF 0000 0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g2: {
+ // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+ // we should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 32) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 32) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_tstbr:
+ // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+ // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+ assert((int64_t)Value >= -(1LL << 15) &&
+ (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+ return (Value & 0xfffc) << (5 - 2);
+
+ case AArch64::fixup_a64_condbr:
+ // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+ // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+ return (Value & 0x1ffffc) << (5 - 2);
+
+ case AArch64::fixup_a64_uncondbr:
+ // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+ case AArch64::fixup_a64_call:
+ // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+ // checking that -2^27 <= S+A-P < 2^27.
+ assert((int64_t)Value >= -(1LL << 27) &&
+ (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+ return (Value & 0xffffffc) >> 2;
+
+ case AArch64::fixup_a64_adr_gottprel_page:
+ // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+ // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+ // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_adr_prel_got_page:
+ // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+ // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+ // of X, with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+ // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+ // G(S) with no overflow check. Check X & 7 == 0
+ assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_tlsdesc_call:
+ // R_AARCH64_TLSDESC_CALL: For relaxation only.
+ return 0;
+ }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
new file mode 100644
index 000000000000..4bcc65dfca27
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -0,0 +1,292 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file handles ELF-specific object emission, converting LLVM's internal
+// fixups into the appropriate relocations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ AArch64ELFObjectWriter(uint8_t OSABI);
+
+ virtual ~AArch64ELFObjectWriter();
+
+protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+ /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_PREL64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_PREL32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_PREL16;
+ case AArch64::fixup_a64_ld_prel:
+ Type = ELF::R_AARCH64_LD_PREL_LO19;
+ break;
+ case AArch64::fixup_a64_adr_prel:
+ Type = ELF::R_AARCH64_ADR_PREL_LO21;
+ break;
+ case AArch64::fixup_a64_adr_prel_page:
+ Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+ break;
+ case AArch64::fixup_a64_adr_prel_got_page:
+ Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+ break;
+ case AArch64::fixup_a64_tstbr:
+ Type = ELF::R_AARCH64_TSTBR14;
+ break;
+ case AArch64::fixup_a64_condbr:
+ Type = ELF::R_AARCH64_CONDBR19;
+ break;
+ case AArch64::fixup_a64_uncondbr:
+ Type = ELF::R_AARCH64_JUMP26;
+ break;
+ case AArch64::fixup_a64_call:
+ Type = ELF::R_AARCH64_CALL26;
+ break;
+ case AArch64::fixup_a64_adr_gottprel_page:
+ Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+ break;
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+ break;
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_ABS64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_ABS32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_ABS16;
+ case AArch64::fixup_a64_add_lo12:
+ Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_lo12:
+ Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_lo12:
+ Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_lo12:
+ Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_lo12:
+ Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst128_lo12:
+ Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g3:
+ Type = ELF::R_AARCH64_MOVW_UABS_G3;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g0:
+ Type = ELF::R_AARCH64_MOVW_SABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g1:
+ Type = ELF::R_AARCH64_MOVW_SABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g2:
+ Type = ELF::R_AARCH64_MOVW_SABS_G2;
+ break;
+
+ // TLS Local-dynamic block
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+ break;
+
+ // TLS initial-exec block
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+ break;
+
+ // TLS local-exec block
+ case AArch64::fixup_a64_movw_tprel_g2:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_tprel_hi12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ break;
+
+ // TLS general-dynamic block
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_call:
+ Type = ELF::R_AARCH64_TLSDESC_CALL;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
new file mode 100644
index 000000000000..b83577af45c6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ MappingSymbolCounter(0), LastEMS(EMS_None) {
+ }
+
+ ~AArch64ELFStreamer() {}
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ EmitA64MappingSymbol();
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_A64,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitA64MappingSymbol() {
+ if (LastEMS == EMS_A64) return;
+ EmitMappingSymbol("$x");
+ LastEMS = EMS_A64;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ /// @}
+};
+}
+
+namespace llvm {
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+}
+
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
new file mode 100644
index 000000000000..5a89ca50cee8
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 000000000000..eeb122d38494
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,113 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LLVM fixups applied to MCInsts in the AArch64
+// backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+ namespace AArch64 {
+ enum Fixups {
+ fixup_a64_ld_prel = FirstTargetFixupKind,
+ fixup_a64_adr_prel,
+ fixup_a64_adr_prel_page,
+
+ fixup_a64_add_lo12,
+
+ fixup_a64_ldst8_lo12,
+ fixup_a64_ldst16_lo12,
+ fixup_a64_ldst32_lo12,
+ fixup_a64_ldst64_lo12,
+ fixup_a64_ldst128_lo12,
+
+ fixup_a64_tstbr,
+ fixup_a64_condbr,
+ fixup_a64_uncondbr,
+ fixup_a64_call,
+
+ fixup_a64_movw_uabs_g0,
+ fixup_a64_movw_uabs_g0_nc,
+ fixup_a64_movw_uabs_g1,
+ fixup_a64_movw_uabs_g1_nc,
+ fixup_a64_movw_uabs_g2,
+ fixup_a64_movw_uabs_g2_nc,
+ fixup_a64_movw_uabs_g3,
+
+ fixup_a64_movw_sabs_g0,
+ fixup_a64_movw_sabs_g1,
+ fixup_a64_movw_sabs_g2,
+
+ fixup_a64_adr_prel_got_page,
+ fixup_a64_ld64_got_lo12_nc,
+
+ // Produce offsets relative to the module's dynamic TLS area.
+ fixup_a64_movw_dtprel_g2,
+ fixup_a64_movw_dtprel_g1,
+ fixup_a64_movw_dtprel_g1_nc,
+ fixup_a64_movw_dtprel_g0,
+ fixup_a64_movw_dtprel_g0_nc,
+ fixup_a64_add_dtprel_hi12,
+ fixup_a64_add_dtprel_lo12,
+ fixup_a64_add_dtprel_lo12_nc,
+ fixup_a64_ldst8_dtprel_lo12,
+ fixup_a64_ldst8_dtprel_lo12_nc,
+ fixup_a64_ldst16_dtprel_lo12,
+ fixup_a64_ldst16_dtprel_lo12_nc,
+ fixup_a64_ldst32_dtprel_lo12,
+ fixup_a64_ldst32_dtprel_lo12_nc,
+ fixup_a64_ldst64_dtprel_lo12,
+ fixup_a64_ldst64_dtprel_lo12_nc,
+
+ // Produce the GOT entry containing a variable's address in TLS's
+ // initial-exec mode.
+ fixup_a64_movw_gottprel_g1,
+ fixup_a64_movw_gottprel_g0_nc,
+ fixup_a64_adr_gottprel_page,
+ fixup_a64_ld64_gottprel_lo12_nc,
+ fixup_a64_ld_gottprel_prel19,
+
+ // Produce offsets relative to the thread pointer: TPIDR_EL0.
+ fixup_a64_movw_tprel_g2,
+ fixup_a64_movw_tprel_g1,
+ fixup_a64_movw_tprel_g1_nc,
+ fixup_a64_movw_tprel_g0,
+ fixup_a64_movw_tprel_g0_nc,
+ fixup_a64_add_tprel_hi12,
+ fixup_a64_add_tprel_lo12,
+ fixup_a64_add_tprel_lo12_nc,
+ fixup_a64_ldst8_tprel_lo12,
+ fixup_a64_ldst8_tprel_lo12_nc,
+ fixup_a64_ldst16_tprel_lo12,
+ fixup_a64_ldst16_tprel_lo12_nc,
+ fixup_a64_ldst32_tprel_lo12,
+ fixup_a64_ldst32_tprel_lo12_nc,
+ fixup_a64_ldst64_tprel_lo12,
+ fixup_a64_ldst64_tprel_lo12_nc,
+
+ // Produce the special fixups used by the general-dynamic TLS model.
+ fixup_a64_tlsdesc_adr_page,
+ fixup_a64_tlsdesc_ld64_lo12_nc,
+ fixup_a64_tlsdesc_add_lo12_nc,
+ fixup_a64_tlsdesc_call,
+
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
new file mode 100644
index 000000000000..8ec8cbf1c525
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+ PointerSize = 8;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "//";
+ PrivateGlobalPrefix = ".L";
+ Code32Directive = ".code\t32";
+
+ Data16bitsDirective = "\t.hword\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = "\t.xword\t";
+
+ UseDataRegionDirectives = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index f786147b9267..a20bc471c20d 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,24 +7,21 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the SPUMCAsmInfo class.
+// This file contains the declaration of the AArch64MCAsmInfo class.
//
//===----------------------------------------------------------------------===//
-#ifndef SPUTARGETASMINFO_H
-#define SPUTARGETASMINFO_H
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
- class Target;
-
- class SPULinuxMCAsmInfo : public MCAsmInfo {
- virtual void anchor();
- public:
- explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+
+ struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+ explicit AArch64ELFMCAsmInfo();
};
+
} // namespace llvm
-#endif /* SPUTARGETASMINFO_H */
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
new file mode 100644
index 000000000000..a5c591eee800
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -0,0 +1,502 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ MCContext &Ctx;
+
+public:
+ AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+
+ ~AArch64MCCodeEmitter() {}
+
+ unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ template<int MemSize>
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+ }
+
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const;
+
+ unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // Labels are handled mostly the same way: a symbol is needed, and
+ // just gets some fixup attached.
+ template<AArch64::Fixups fixupDesired>
+ unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ EmitByte(Val & 0xff, OS);
+ Val >>= 8;
+ }
+ }
+
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ template<int hasRs, int hasRt2> unsigned
+ fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (!MO.isExpr()) {
+ // This can occur for manually decoded or constructed MCInsts, but neither
+ // the assembly-parser nor instruction selection will currently produce an
+ // MCInst that's not a symbol reference.
+ assert(MO.isImm() && "Unexpected address requested");
+ return MO.getImm();
+ }
+
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const {
+ const MCOperand &ImmOp = MI.getOperand(OpIdx);
+ if (ImmOp.isImm())
+ return ImmOp.getImm();
+
+ assert(ImmOp.isExpr() && "Unexpected operand type");
+ const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+ unsigned FixupKind;
+
+
+ switch (Expr->getKind()) {
+ default: llvm_unreachable("Unexpected operand modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+ AArch64::fixup_a64_ldst16_lo12,
+ AArch64::fixup_a64_ldst32_lo12,
+ AArch64::fixup_a64_ldst64_lo12,
+ AArch64::fixup_a64_ldst128_lo12 };
+ assert(MemSize <= 16 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+ AArch64::fixup_a64_ldst16_dtprel_lo12,
+ AArch64::fixup_a64_ldst32_dtprel_lo12,
+ AArch64::fixup_a64_ldst64_dtprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+ AArch64::fixup_a64_ldst16_tprel_lo12,
+ AArch64::fixup_a64_ldst32_tprel_lo12,
+ AArch64::fixup_a64_ldst64_tprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+ break;
+ }
+
+ return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind = 0;
+ switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+ default: llvm_unreachable("Invalid expression modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12:
+ FixupKind = AArch64::fixup_a64_add_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+ if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+ Modifier = Expr->getKind();
+
+ unsigned FixupKind = 0;
+ switch(Modifier) {
+ case AArch64MCExpr::VK_AARCH64_None:
+ FixupKind = AArch64::fixup_a64_adr_prel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOT:
+ FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+ FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC:
+ FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+ break;
+ default:
+ llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isExpr())
+ return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind;
+ if (isa<AArch64MCExpr>(MO.getExpr())) {
+ assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+ == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ && "Invalid symbol modifier for literal load");
+ FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+ } else {
+ FixupKind = AArch64::fixup_a64_ld_prel;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+ const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+ unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+ if (UImm16MO.isImm()) {
+ Result |= UImm16MO.getImm();
+ return Result;
+ }
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ AArch64::Fixups requestedFixup;
+ switch (A64E->getKind()) {
+ default: llvm_unreachable("unexpected expression modifier");
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+ }
+
+ return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (!hasRs) EncodedValue |= 0x001F0000;
+ if (!hasRt2) EncodedValue |= 0x00007C00;
+
+ return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+ // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+ // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+ // job to ensure that any bits possibly affected by this are 0. This means we
+ // must zero out bit 30 (essentially emitting a MOVN).
+ MCOperand UImm16MO = MI.getOperand(1);
+
+ // Nothing to do if there's no fixup.
+ if (UImm16MO.isImm())
+ return EncodedValue;
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ switch (A64E->getKind()) {
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ return EncodedValue & ~(1u << 30);
+ default:
+ // Nothing to do for an unsigned fixup.
+ return EncodedValue;
+ }
+
+ llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+ unsigned EncodedValue) const {
+ // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+ // (i.e. all bits 1) but is ignored by the processor.
+ EncodedValue |= 0x1f << 10;
+ return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+ // following (BLR) instruction. It doesn't emit any code itself so it
+ // doesn't go through the normal TableGenerated channels.
+ MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+ const MCExpr *Expr;
+ Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+ Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+ return;
+ }
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+ EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
new file mode 100644
index 000000000000..c1abfe74dfdd
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -0,0 +1,178 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_AARCH64_GOT: OS << ":got:"; break;
+ case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break;
+ case VK_AARCH64_LO12: OS << ":lo12:"; break;
+ case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break;
+ case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break;
+ case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break;
+ case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break;
+ case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break;
+ case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break;
+ case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break;
+ case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break;
+ case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break;
+ case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break;
+ case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break;
+ case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break;
+ case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break;
+ case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break;
+ case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break;
+ case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break;
+ case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break;
+ case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break;
+ case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break;
+ case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break;
+ case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break;
+ case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break;
+ case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break;
+ case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break;
+ case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break;
+ case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break;
+ case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break;
+ case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break;
+ case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break;
+ case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break;
+ case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break;
+ case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break;
+
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expression");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ // We're known to be under a TLS fixup, so any symbol should be
+ // modified. There should be only one.
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ switch (getKind()) {
+ default:
+ return;
+ case VK_AARCH64_DTPREL_G2:
+ case VK_AARCH64_DTPREL_G1:
+ case VK_AARCH64_DTPREL_G1_NC:
+ case VK_AARCH64_DTPREL_G0:
+ case VK_AARCH64_DTPREL_G0_NC:
+ case VK_AARCH64_DTPREL_HI12:
+ case VK_AARCH64_DTPREL_LO12:
+ case VK_AARCH64_DTPREL_LO12_NC:
+ case VK_AARCH64_GOTTPREL_G1:
+ case VK_AARCH64_GOTTPREL_G0_NC:
+ case VK_AARCH64_GOTTPREL:
+ case VK_AARCH64_GOTTPREL_LO12:
+ case VK_AARCH64_TPREL_G2:
+ case VK_AARCH64_TPREL_G1:
+ case VK_AARCH64_TPREL_G1_NC:
+ case VK_AARCH64_TPREL_G0:
+ case VK_AARCH64_TPREL_G0_NC:
+ case VK_AARCH64_TPREL_HI12:
+ case VK_AARCH64_TPREL_LO12:
+ case VK_AARCH64_TPREL_LO12_NC:
+ case VK_AARCH64_TLSDESC:
+ case VK_AARCH64_TLSDESC_LO12:
+ break;
+ }
+
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbolsImpl(BE->getLHS(), Asm);
+ AddValueSymbolsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbolsImpl(getSubExpr(), Asm);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
new file mode 100644
index 000000000000..c0e3b29474d1
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -0,0 +1,167 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes AArch64-specific MCExprs, used for modifiers like
+// ":lo12:" or ":gottprel_g1:".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_AARCH64_None,
+ VK_AARCH64_GOT, // :got: modifier in assembly
+ VK_AARCH64_GOT_LO12, // :got_lo12:
+ VK_AARCH64_LO12, // :lo12:
+
+ VK_AARCH64_ABS_G0, // :abs_g0:
+ VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+ VK_AARCH64_ABS_G1,
+ VK_AARCH64_ABS_G1_NC,
+ VK_AARCH64_ABS_G2,
+ VK_AARCH64_ABS_G2_NC,
+ VK_AARCH64_ABS_G3,
+
+ VK_AARCH64_SABS_G0, // :abs_g0_s:
+ VK_AARCH64_SABS_G1,
+ VK_AARCH64_SABS_G2,
+
+ VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+ VK_AARCH64_DTPREL_G1,
+ VK_AARCH64_DTPREL_G1_NC,
+ VK_AARCH64_DTPREL_G0,
+ VK_AARCH64_DTPREL_G0_NC,
+ VK_AARCH64_DTPREL_HI12,
+ VK_AARCH64_DTPREL_LO12,
+ VK_AARCH64_DTPREL_LO12_NC,
+
+ VK_AARCH64_GOTTPREL_G1, // :gottprel:
+ VK_AARCH64_GOTTPREL_G0_NC,
+ VK_AARCH64_GOTTPREL,
+ VK_AARCH64_GOTTPREL_LO12,
+
+ VK_AARCH64_TPREL_G2, // :tprel:
+ VK_AARCH64_TPREL_G1,
+ VK_AARCH64_TPREL_G1_NC,
+ VK_AARCH64_TPREL_G0,
+ VK_AARCH64_TPREL_G0_NC,
+ VK_AARCH64_TPREL_HI12,
+ VK_AARCH64_TPREL_LO12,
+ VK_AARCH64_TPREL_LO12_NC,
+
+ VK_AARCH64_TLSDESC, // :tlsdesc:
+ VK_AARCH64_TLSDESC_LO12
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 000000000000..7960db08c8d6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,194 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+ return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAArch64MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAArch64MCRegisterInfo(X, AArch64::X30);
+ return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+ // On ELF platforms the default static relocation model has a smart enough
+ // linker to cope with referencing external symbols defined in a shared
+ // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+ RM = Reloc::Static;
+ }
+
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new AArch64InstPrinter(MAI, MII, MRI, STI);
+ return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+ AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+ // FIXME: We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+ != MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+ return Addr + Imm;
+ }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+ createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+ createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+ createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ using AArch64_MC::createAArch64MCSubtargetInfo;
+ TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+ createAArch64MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+ createAArch64MCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+ createAArch64MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+ createAArch64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+ createAArch64MCInstPrinter);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
new file mode 100644
index 000000000000..3849fe379513
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+ MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..44c66a224e30
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMAArch64Desc
+ AArch64AsmBackend.cpp
+ AArch64ELFObjectWriter.cpp
+ AArch64ELFStreamer.cpp
+ AArch64MCAsmInfo.cpp
+ AArch64MCCodeEmitter.cpp
+ AArch64MCExpr.cpp
+ AArch64MCTargetDesc.cpp
+ )
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..37c8035a49f9
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Info MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile
index 10d9a42239ad..5779ac5ac60a 100644
--- a/lib/Target/CellSPU/MCTargetDesc/Makefile
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUDesc
+LIBRARYNAME = LLVMAArch64Desc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
new file mode 100644
index 000000000000..641bb83c4775
--- /dev/null
+++ b/lib/Target/AArch64/Makefile
@@ -0,0 +1,30 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenAsmMatcher.inc \
+ AArch64GenAsmWriter.inc \
+ AArch64GenCallingConv.inc \
+ AArch64GenDAGISel.inc \
+ AArch64GenDisassemblerTables.inc \
+ AArch64GenInstrInfo.inc \
+ AArch64GenMCCodeEmitter.inc \
+ AArch64GenMCPseudoLowering.inc \
+ AArch64GenRegisterInfo.inc \
+ AArch64GenSubtargetInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt
new file mode 100644
index 000000000000..601990f17dee
--- /dev/null
+++ b/lib/Target/AArch64/README.txt
@@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
new file mode 100644
index 000000000000..b8099cb26b0f
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the key registration step for the architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+ RegisterTarget<Triple::aarch64>
+ X(TheAArch64Target, "aarch64", "AArch64");
+}
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..e236eed00be1
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+ AArch64TargetInfo.cpp
+ )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
index 6937e705ff7f..5b003f012218 100644
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,8 @@
[component_0]
type = Library
-name = CellSPUInfo
-parent = CellSPU
+name = AArch64Info
+parent = AArch64
required_libraries = MC Support Target
-add_to_library_groups = CellSPU
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile
index 9cb6827b4323..9dc9aa4bccf7 100644
--- a/lib/Target/CellSPU/TargetInfo/Makefile
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,7 +7,7 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUInfo
+LIBRARYNAME = LLVMAArch64Info
# Hack: we need to include 'main' target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
new file mode 100644
index 000000000000..1678559aa084
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -0,0 +1,1103 @@
+//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides basic encoding and assembly information for AArch64.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Value == Value) {
+ Valid = true;
+ return Pairs[i].Name;
+ }
+ }
+
+ Valid = false;
+ return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+ std::string LowerCaseName = Name.lower();
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Name == LowerCaseName) {
+ Valid = true;
+ return Pairs[i].Value;
+ }
+ }
+
+ Valid = false;
+ return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+ return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+ {"s1e1r", S1E1R},
+ {"s1e2r", S1E2R},
+ {"s1e3r", S1E3R},
+ {"s1e1w", S1E1W},
+ {"s1e2w", S1E2W},
+ {"s1e3w", S1E3W},
+ {"s1e0r", S1E0R},
+ {"s1e0w", S1E0W},
+ {"s12e1r", S12E1R},
+ {"s12e1w", S12E1W},
+ {"s12e0r", S12E0R},
+ {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+ : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+ {"oshld", OSHLD},
+ {"oshst", OSHST},
+ {"osh", OSH},
+ {"nshld", NSHLD},
+ {"nshst", NSHST},
+ {"nsh", NSH},
+ {"ishld", ISHLD},
+ {"ishst", ISHST},
+ {"ish", ISH},
+ {"ld", LD},
+ {"st", ST},
+ {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+ : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+ {"zva", ZVA},
+ {"ivac", IVAC},
+ {"isw", ISW},
+ {"cvac", CVAC},
+ {"csw", CSW},
+ {"cvau", CVAU},
+ {"civac", CIVAC},
+ {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+ : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+ {"ialluis", IALLUIS},
+ {"iallu", IALLU},
+ {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+ : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+ {"sy", SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+ : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+ {"pldl1keep", PLDL1KEEP},
+ {"pldl1strm", PLDL1STRM},
+ {"pldl2keep", PLDL2KEEP},
+ {"pldl2strm", PLDL2STRM},
+ {"pldl3keep", PLDL3KEEP},
+ {"pldl3strm", PLDL3STRM},
+ {"plil1keep", PLIL1KEEP},
+ {"plil1strm", PLIL1STRM},
+ {"plil2keep", PLIL2KEEP},
+ {"plil2strm", PLIL2STRM},
+ {"plil3keep", PLIL3KEEP},
+ {"plil3strm", PLIL3STRM},
+ {"pstl1keep", PSTL1KEEP},
+ {"pstl1strm", PSTL1STRM},
+ {"pstl2keep", PSTL2KEEP},
+ {"pstl2strm", PSTL2STRM},
+ {"pstl3keep", PSTL3KEEP},
+ {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+ : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+ {"spsel", SPSel},
+ {"daifset", DAIFSet},
+ {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+ : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+ {"mdccsr_el0", MDCCSR_EL0},
+ {"dbgdtrrx_el0", DBGDTRRX_EL0},
+ {"mdrar_el1", MDRAR_EL1},
+ {"oslsr_el1", OSLSR_EL1},
+ {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+ {"pmceid0_el0", PMCEID0_EL0},
+ {"pmceid1_el0", PMCEID1_EL0},
+ {"midr_el1", MIDR_EL1},
+ {"ccsidr_el1", CCSIDR_EL1},
+ {"clidr_el1", CLIDR_EL1},
+ {"ctr_el0", CTR_EL0},
+ {"mpidr_el1", MPIDR_EL1},
+ {"revidr_el1", REVIDR_EL1},
+ {"aidr_el1", AIDR_EL1},
+ {"dczid_el0", DCZID_EL0},
+ {"id_pfr0_el1", ID_PFR0_EL1},
+ {"id_pfr1_el1", ID_PFR1_EL1},
+ {"id_dfr0_el1", ID_DFR0_EL1},
+ {"id_afr0_el1", ID_AFR0_EL1},
+ {"id_mmfr0_el1", ID_MMFR0_EL1},
+ {"id_mmfr1_el1", ID_MMFR1_EL1},
+ {"id_mmfr2_el1", ID_MMFR2_EL1},
+ {"id_mmfr3_el1", ID_MMFR3_EL1},
+ {"id_isar0_el1", ID_ISAR0_EL1},
+ {"id_isar1_el1", ID_ISAR1_EL1},
+ {"id_isar2_el1", ID_ISAR2_EL1},
+ {"id_isar3_el1", ID_ISAR3_EL1},
+ {"id_isar4_el1", ID_ISAR4_EL1},
+ {"id_isar5_el1", ID_ISAR5_EL1},
+ {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+ {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+ {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+ {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+ {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+ {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+ {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+ {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+ {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+ {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+ {"mvfr0_el1", MVFR0_EL1},
+ {"mvfr1_el1", MVFR1_EL1},
+ {"mvfr2_el1", MVFR2_EL1},
+ {"rvbar_el1", RVBAR_EL1},
+ {"rvbar_el2", RVBAR_EL2},
+ {"rvbar_el3", RVBAR_EL3},
+ {"isr_el1", ISR_EL1},
+ {"cntpct_el0", CNTPCT_EL0},
+ {"cntvct_el0", CNTVCT_EL0},
+
+ // Trace registers
+ {"trcstatr", TRCSTATR},
+ {"trcidr8", TRCIDR8},
+ {"trcidr9", TRCIDR9},
+ {"trcidr10", TRCIDR10},
+ {"trcidr11", TRCIDR11},
+ {"trcidr12", TRCIDR12},
+ {"trcidr13", TRCIDR13},
+ {"trcidr0", TRCIDR0},
+ {"trcidr1", TRCIDR1},
+ {"trcidr2", TRCIDR2},
+ {"trcidr3", TRCIDR3},
+ {"trcidr4", TRCIDR4},
+ {"trcidr5", TRCIDR5},
+ {"trcidr6", TRCIDR6},
+ {"trcidr7", TRCIDR7},
+ {"trcoslsr", TRCOSLSR},
+ {"trcpdsr", TRCPDSR},
+ {"trcdevaff0", TRCDEVAFF0},
+ {"trcdevaff1", TRCDEVAFF1},
+ {"trclsr", TRCLSR},
+ {"trcauthstatus", TRCAUTHSTATUS},
+ {"trcdevarch", TRCDEVARCH},
+ {"trcdevid", TRCDEVID},
+ {"trcdevtype", TRCDEVTYPE},
+ {"trcpidr4", TRCPIDR4},
+ {"trcpidr5", TRCPIDR5},
+ {"trcpidr6", TRCPIDR6},
+ {"trcpidr7", TRCPIDR7},
+ {"trcpidr0", TRCPIDR0},
+ {"trcpidr1", TRCPIDR1},
+ {"trcpidr2", TRCPIDR2},
+ {"trcpidr3", TRCPIDR3},
+ {"trccidr0", TRCCIDR0},
+ {"trccidr1", TRCCIDR1},
+ {"trccidr2", TRCCIDR2},
+ {"trccidr3", TRCCIDR3},
+
+ // GICv3 registers
+ {"icc_iar1_el1", ICC_IAR1_EL1},
+ {"icc_iar0_el1", ICC_IAR0_EL1},
+ {"icc_hppir1_el1", ICC_HPPIR1_EL1},
+ {"icc_hppir0_el1", ICC_HPPIR0_EL1},
+ {"icc_rpr_el1", ICC_RPR_EL1},
+ {"ich_vtr_el2", ICH_VTR_EL2},
+ {"ich_eisr_el2", ICH_EISR_EL2},
+ {"ich_elsr_el2", ICH_ELSR_EL2}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+ InstPairs = &MRSPairs[0];
+ NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+ {"dbgdtrtx_el0", DBGDTRTX_EL0},
+ {"oslar_el1", OSLAR_EL1},
+ {"pmswinc_el0", PMSWINC_EL0},
+
+ // Trace registers
+ {"trcoslar", TRCOSLAR},
+ {"trclar", TRCLAR},
+
+ // GICv3 registers
+ {"icc_eoir1_el1", ICC_EOIR1_EL1},
+ {"icc_eoir0_el1", ICC_EOIR0_EL1},
+ {"icc_dir_el1", ICC_DIR_EL1},
+ {"icc_sgi1r_el1", ICC_SGI1R_EL1},
+ {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
+ {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+ InstPairs = &MSRPairs[0];
+ NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+ {"osdtrrx_el1", OSDTRRX_EL1},
+ {"osdtrtx_el1", OSDTRTX_EL1},
+ {"teecr32_el1", TEECR32_EL1},
+ {"mdccint_el1", MDCCINT_EL1},
+ {"mdscr_el1", MDSCR_EL1},
+ {"dbgdtr_el0", DBGDTR_EL0},
+ {"oseccr_el1", OSECCR_EL1},
+ {"dbgvcr32_el2", DBGVCR32_EL2},
+ {"dbgbvr0_el1", DBGBVR0_EL1},
+ {"dbgbvr1_el1", DBGBVR1_EL1},
+ {"dbgbvr2_el1", DBGBVR2_EL1},
+ {"dbgbvr3_el1", DBGBVR3_EL1},
+ {"dbgbvr4_el1", DBGBVR4_EL1},
+ {"dbgbvr5_el1", DBGBVR5_EL1},
+ {"dbgbvr6_el1", DBGBVR6_EL1},
+ {"dbgbvr7_el1", DBGBVR7_EL1},
+ {"dbgbvr8_el1", DBGBVR8_EL1},
+ {"dbgbvr9_el1", DBGBVR9_EL1},
+ {"dbgbvr10_el1", DBGBVR10_EL1},
+ {"dbgbvr11_el1", DBGBVR11_EL1},
+ {"dbgbvr12_el1", DBGBVR12_EL1},
+ {"dbgbvr13_el1", DBGBVR13_EL1},
+ {"dbgbvr14_el1", DBGBVR14_EL1},
+ {"dbgbvr15_el1", DBGBVR15_EL1},
+ {"dbgbcr0_el1", DBGBCR0_EL1},
+ {"dbgbcr1_el1", DBGBCR1_EL1},
+ {"dbgbcr2_el1", DBGBCR2_EL1},
+ {"dbgbcr3_el1", DBGBCR3_EL1},
+ {"dbgbcr4_el1", DBGBCR4_EL1},
+ {"dbgbcr5_el1", DBGBCR5_EL1},
+ {"dbgbcr6_el1", DBGBCR6_EL1},
+ {"dbgbcr7_el1", DBGBCR7_EL1},
+ {"dbgbcr8_el1", DBGBCR8_EL1},
+ {"dbgbcr9_el1", DBGBCR9_EL1},
+ {"dbgbcr10_el1", DBGBCR10_EL1},
+ {"dbgbcr11_el1", DBGBCR11_EL1},
+ {"dbgbcr12_el1", DBGBCR12_EL1},
+ {"dbgbcr13_el1", DBGBCR13_EL1},
+ {"dbgbcr14_el1", DBGBCR14_EL1},
+ {"dbgbcr15_el1", DBGBCR15_EL1},
+ {"dbgwvr0_el1", DBGWVR0_EL1},
+ {"dbgwvr1_el1", DBGWVR1_EL1},
+ {"dbgwvr2_el1", DBGWVR2_EL1},
+ {"dbgwvr3_el1", DBGWVR3_EL1},
+ {"dbgwvr4_el1", DBGWVR4_EL1},
+ {"dbgwvr5_el1", DBGWVR5_EL1},
+ {"dbgwvr6_el1", DBGWVR6_EL1},
+ {"dbgwvr7_el1", DBGWVR7_EL1},
+ {"dbgwvr8_el1", DBGWVR8_EL1},
+ {"dbgwvr9_el1", DBGWVR9_EL1},
+ {"dbgwvr10_el1", DBGWVR10_EL1},
+ {"dbgwvr11_el1", DBGWVR11_EL1},
+ {"dbgwvr12_el1", DBGWVR12_EL1},
+ {"dbgwvr13_el1", DBGWVR13_EL1},
+ {"dbgwvr14_el1", DBGWVR14_EL1},
+ {"dbgwvr15_el1", DBGWVR15_EL1},
+ {"dbgwcr0_el1", DBGWCR0_EL1},
+ {"dbgwcr1_el1", DBGWCR1_EL1},
+ {"dbgwcr2_el1", DBGWCR2_EL1},
+ {"dbgwcr3_el1", DBGWCR3_EL1},
+ {"dbgwcr4_el1", DBGWCR4_EL1},
+ {"dbgwcr5_el1", DBGWCR5_EL1},
+ {"dbgwcr6_el1", DBGWCR6_EL1},
+ {"dbgwcr7_el1", DBGWCR7_EL1},
+ {"dbgwcr8_el1", DBGWCR8_EL1},
+ {"dbgwcr9_el1", DBGWCR9_EL1},
+ {"dbgwcr10_el1", DBGWCR10_EL1},
+ {"dbgwcr11_el1", DBGWCR11_EL1},
+ {"dbgwcr12_el1", DBGWCR12_EL1},
+ {"dbgwcr13_el1", DBGWCR13_EL1},
+ {"dbgwcr14_el1", DBGWCR14_EL1},
+ {"dbgwcr15_el1", DBGWCR15_EL1},
+ {"teehbr32_el1", TEEHBR32_EL1},
+ {"osdlr_el1", OSDLR_EL1},
+ {"dbgprcr_el1", DBGPRCR_EL1},
+ {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+ {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+ {"csselr_el1", CSSELR_EL1},
+ {"vpidr_el2", VPIDR_EL2},
+ {"vmpidr_el2", VMPIDR_EL2},
+ {"sctlr_el1", SCTLR_EL1},
+ {"sctlr_el2", SCTLR_EL2},
+ {"sctlr_el3", SCTLR_EL3},
+ {"actlr_el1", ACTLR_EL1},
+ {"actlr_el2", ACTLR_EL2},
+ {"actlr_el3", ACTLR_EL3},
+ {"cpacr_el1", CPACR_EL1},
+ {"hcr_el2", HCR_EL2},
+ {"scr_el3", SCR_EL3},
+ {"mdcr_el2", MDCR_EL2},
+ {"sder32_el3", SDER32_EL3},
+ {"cptr_el2", CPTR_EL2},
+ {"cptr_el3", CPTR_EL3},
+ {"hstr_el2", HSTR_EL2},
+ {"hacr_el2", HACR_EL2},
+ {"mdcr_el3", MDCR_EL3},
+ {"ttbr0_el1", TTBR0_EL1},
+ {"ttbr0_el2", TTBR0_EL2},
+ {"ttbr0_el3", TTBR0_EL3},
+ {"ttbr1_el1", TTBR1_EL1},
+ {"tcr_el1", TCR_EL1},
+ {"tcr_el2", TCR_EL2},
+ {"tcr_el3", TCR_EL3},
+ {"vttbr_el2", VTTBR_EL2},
+ {"vtcr_el2", VTCR_EL2},
+ {"dacr32_el2", DACR32_EL2},
+ {"spsr_el1", SPSR_EL1},
+ {"spsr_el2", SPSR_EL2},
+ {"spsr_el3", SPSR_EL3},
+ {"elr_el1", ELR_EL1},
+ {"elr_el2", ELR_EL2},
+ {"elr_el3", ELR_EL3},
+ {"sp_el0", SP_EL0},
+ {"sp_el1", SP_EL1},
+ {"sp_el2", SP_EL2},
+ {"spsel", SPSel},
+ {"nzcv", NZCV},
+ {"daif", DAIF},
+ {"currentel", CurrentEL},
+ {"spsr_irq", SPSR_irq},
+ {"spsr_abt", SPSR_abt},
+ {"spsr_und", SPSR_und},
+ {"spsr_fiq", SPSR_fiq},
+ {"fpcr", FPCR},
+ {"fpsr", FPSR},
+ {"dspsr_el0", DSPSR_EL0},
+ {"dlr_el0", DLR_EL0},
+ {"ifsr32_el2", IFSR32_EL2},
+ {"afsr0_el1", AFSR0_EL1},
+ {"afsr0_el2", AFSR0_EL2},
+ {"afsr0_el3", AFSR0_EL3},
+ {"afsr1_el1", AFSR1_EL1},
+ {"afsr1_el2", AFSR1_EL2},
+ {"afsr1_el3", AFSR1_EL3},
+ {"esr_el1", ESR_EL1},
+ {"esr_el2", ESR_EL2},
+ {"esr_el3", ESR_EL3},
+ {"fpexc32_el2", FPEXC32_EL2},
+ {"far_el1", FAR_EL1},
+ {"far_el2", FAR_EL2},
+ {"far_el3", FAR_EL3},
+ {"hpfar_el2", HPFAR_EL2},
+ {"par_el1", PAR_EL1},
+ {"pmcr_el0", PMCR_EL0},
+ {"pmcntenset_el0", PMCNTENSET_EL0},
+ {"pmcntenclr_el0", PMCNTENCLR_EL0},
+ {"pmovsclr_el0", PMOVSCLR_EL0},
+ {"pmselr_el0", PMSELR_EL0},
+ {"pmccntr_el0", PMCCNTR_EL0},
+ {"pmxevtyper_el0", PMXEVTYPER_EL0},
+ {"pmxevcntr_el0", PMXEVCNTR_EL0},
+ {"pmuserenr_el0", PMUSERENR_EL0},
+ {"pmintenset_el1", PMINTENSET_EL1},
+ {"pmintenclr_el1", PMINTENCLR_EL1},
+ {"pmovsset_el0", PMOVSSET_EL0},
+ {"mair_el1", MAIR_EL1},
+ {"mair_el2", MAIR_EL2},
+ {"mair_el3", MAIR_EL3},
+ {"amair_el1", AMAIR_EL1},
+ {"amair_el2", AMAIR_EL2},
+ {"amair_el3", AMAIR_EL3},
+ {"vbar_el1", VBAR_EL1},
+ {"vbar_el2", VBAR_EL2},
+ {"vbar_el3", VBAR_EL3},
+ {"rmr_el1", RMR_EL1},
+ {"rmr_el2", RMR_EL2},
+ {"rmr_el3", RMR_EL3},
+ {"contextidr_el1", CONTEXTIDR_EL1},
+ {"tpidr_el0", TPIDR_EL0},
+ {"tpidr_el2", TPIDR_EL2},
+ {"tpidr_el3", TPIDR_EL3},
+ {"tpidrro_el0", TPIDRRO_EL0},
+ {"tpidr_el1", TPIDR_EL1},
+ {"cntfrq_el0", CNTFRQ_EL0},
+ {"cntvoff_el2", CNTVOFF_EL2},
+ {"cntkctl_el1", CNTKCTL_EL1},
+ {"cnthctl_el2", CNTHCTL_EL2},
+ {"cntp_tval_el0", CNTP_TVAL_EL0},
+ {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+ {"cntps_tval_el1", CNTPS_TVAL_EL1},
+ {"cntp_ctl_el0", CNTP_CTL_EL0},
+ {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+ {"cntps_ctl_el1", CNTPS_CTL_EL1},
+ {"cntp_cval_el0", CNTP_CVAL_EL0},
+ {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+ {"cntps_cval_el1", CNTPS_CVAL_EL1},
+ {"cntv_tval_el0", CNTV_TVAL_EL0},
+ {"cntv_ctl_el0", CNTV_CTL_EL0},
+ {"cntv_cval_el0", CNTV_CVAL_EL0},
+ {"pmevcntr0_el0", PMEVCNTR0_EL0},
+ {"pmevcntr1_el0", PMEVCNTR1_EL0},
+ {"pmevcntr2_el0", PMEVCNTR2_EL0},
+ {"pmevcntr3_el0", PMEVCNTR3_EL0},
+ {"pmevcntr4_el0", PMEVCNTR4_EL0},
+ {"pmevcntr5_el0", PMEVCNTR5_EL0},
+ {"pmevcntr6_el0", PMEVCNTR6_EL0},
+ {"pmevcntr7_el0", PMEVCNTR7_EL0},
+ {"pmevcntr8_el0", PMEVCNTR8_EL0},
+ {"pmevcntr9_el0", PMEVCNTR9_EL0},
+ {"pmevcntr10_el0", PMEVCNTR10_EL0},
+ {"pmevcntr11_el0", PMEVCNTR11_EL0},
+ {"pmevcntr12_el0", PMEVCNTR12_EL0},
+ {"pmevcntr13_el0", PMEVCNTR13_EL0},
+ {"pmevcntr14_el0", PMEVCNTR14_EL0},
+ {"pmevcntr15_el0", PMEVCNTR15_EL0},
+ {"pmevcntr16_el0", PMEVCNTR16_EL0},
+ {"pmevcntr17_el0", PMEVCNTR17_EL0},
+ {"pmevcntr18_el0", PMEVCNTR18_EL0},
+ {"pmevcntr19_el0", PMEVCNTR19_EL0},
+ {"pmevcntr20_el0", PMEVCNTR20_EL0},
+ {"pmevcntr21_el0", PMEVCNTR21_EL0},
+ {"pmevcntr22_el0", PMEVCNTR22_EL0},
+ {"pmevcntr23_el0", PMEVCNTR23_EL0},
+ {"pmevcntr24_el0", PMEVCNTR24_EL0},
+ {"pmevcntr25_el0", PMEVCNTR25_EL0},
+ {"pmevcntr26_el0", PMEVCNTR26_EL0},
+ {"pmevcntr27_el0", PMEVCNTR27_EL0},
+ {"pmevcntr28_el0", PMEVCNTR28_EL0},
+ {"pmevcntr29_el0", PMEVCNTR29_EL0},
+ {"pmevcntr30_el0", PMEVCNTR30_EL0},
+ {"pmccfiltr_el0", PMCCFILTR_EL0},
+ {"pmevtyper0_el0", PMEVTYPER0_EL0},
+ {"pmevtyper1_el0", PMEVTYPER1_EL0},
+ {"pmevtyper2_el0", PMEVTYPER2_EL0},
+ {"pmevtyper3_el0", PMEVTYPER3_EL0},
+ {"pmevtyper4_el0", PMEVTYPER4_EL0},
+ {"pmevtyper5_el0", PMEVTYPER5_EL0},
+ {"pmevtyper6_el0", PMEVTYPER6_EL0},
+ {"pmevtyper7_el0", PMEVTYPER7_EL0},
+ {"pmevtyper8_el0", PMEVTYPER8_EL0},
+ {"pmevtyper9_el0", PMEVTYPER9_EL0},
+ {"pmevtyper10_el0", PMEVTYPER10_EL0},
+ {"pmevtyper11_el0", PMEVTYPER11_EL0},
+ {"pmevtyper12_el0", PMEVTYPER12_EL0},
+ {"pmevtyper13_el0", PMEVTYPER13_EL0},
+ {"pmevtyper14_el0", PMEVTYPER14_EL0},
+ {"pmevtyper15_el0", PMEVTYPER15_EL0},
+ {"pmevtyper16_el0", PMEVTYPER16_EL0},
+ {"pmevtyper17_el0", PMEVTYPER17_EL0},
+ {"pmevtyper18_el0", PMEVTYPER18_EL0},
+ {"pmevtyper19_el0", PMEVTYPER19_EL0},
+ {"pmevtyper20_el0", PMEVTYPER20_EL0},
+ {"pmevtyper21_el0", PMEVTYPER21_EL0},
+ {"pmevtyper22_el0", PMEVTYPER22_EL0},
+ {"pmevtyper23_el0", PMEVTYPER23_EL0},
+ {"pmevtyper24_el0", PMEVTYPER24_EL0},
+ {"pmevtyper25_el0", PMEVTYPER25_EL0},
+ {"pmevtyper26_el0", PMEVTYPER26_EL0},
+ {"pmevtyper27_el0", PMEVTYPER27_EL0},
+ {"pmevtyper28_el0", PMEVTYPER28_EL0},
+ {"pmevtyper29_el0", PMEVTYPER29_EL0},
+ {"pmevtyper30_el0", PMEVTYPER30_EL0},
+
+ // Trace registers
+ {"trcprgctlr", TRCPRGCTLR},
+ {"trcprocselr", TRCPROCSELR},
+ {"trcconfigr", TRCCONFIGR},
+ {"trcauxctlr", TRCAUXCTLR},
+ {"trceventctl0r", TRCEVENTCTL0R},
+ {"trceventctl1r", TRCEVENTCTL1R},
+ {"trcstallctlr", TRCSTALLCTLR},
+ {"trctsctlr", TRCTSCTLR},
+ {"trcsyncpr", TRCSYNCPR},
+ {"trcccctlr", TRCCCCTLR},
+ {"trcbbctlr", TRCBBCTLR},
+ {"trctraceidr", TRCTRACEIDR},
+ {"trcqctlr", TRCQCTLR},
+ {"trcvictlr", TRCVICTLR},
+ {"trcviiectlr", TRCVIIECTLR},
+ {"trcvissctlr", TRCVISSCTLR},
+ {"trcvipcssctlr", TRCVIPCSSCTLR},
+ {"trcvdctlr", TRCVDCTLR},
+ {"trcvdsacctlr", TRCVDSACCTLR},
+ {"trcvdarcctlr", TRCVDARCCTLR},
+ {"trcseqevr0", TRCSEQEVR0},
+ {"trcseqevr1", TRCSEQEVR1},
+ {"trcseqevr2", TRCSEQEVR2},
+ {"trcseqrstevr", TRCSEQRSTEVR},
+ {"trcseqstr", TRCSEQSTR},
+ {"trcextinselr", TRCEXTINSELR},
+ {"trccntrldvr0", TRCCNTRLDVR0},
+ {"trccntrldvr1", TRCCNTRLDVR1},
+ {"trccntrldvr2", TRCCNTRLDVR2},
+ {"trccntrldvr3", TRCCNTRLDVR3},
+ {"trccntctlr0", TRCCNTCTLR0},
+ {"trccntctlr1", TRCCNTCTLR1},
+ {"trccntctlr2", TRCCNTCTLR2},
+ {"trccntctlr3", TRCCNTCTLR3},
+ {"trccntvr0", TRCCNTVR0},
+ {"trccntvr1", TRCCNTVR1},
+ {"trccntvr2", TRCCNTVR2},
+ {"trccntvr3", TRCCNTVR3},
+ {"trcimspec0", TRCIMSPEC0},
+ {"trcimspec1", TRCIMSPEC1},
+ {"trcimspec2", TRCIMSPEC2},
+ {"trcimspec3", TRCIMSPEC3},
+ {"trcimspec4", TRCIMSPEC4},
+ {"trcimspec5", TRCIMSPEC5},
+ {"trcimspec6", TRCIMSPEC6},
+ {"trcimspec7", TRCIMSPEC7},
+ {"trcrsctlr2", TRCRSCTLR2},
+ {"trcrsctlr3", TRCRSCTLR3},
+ {"trcrsctlr4", TRCRSCTLR4},
+ {"trcrsctlr5", TRCRSCTLR5},
+ {"trcrsctlr6", TRCRSCTLR6},
+ {"trcrsctlr7", TRCRSCTLR7},
+ {"trcrsctlr8", TRCRSCTLR8},
+ {"trcrsctlr9", TRCRSCTLR9},
+ {"trcrsctlr10", TRCRSCTLR10},
+ {"trcrsctlr11", TRCRSCTLR11},
+ {"trcrsctlr12", TRCRSCTLR12},
+ {"trcrsctlr13", TRCRSCTLR13},
+ {"trcrsctlr14", TRCRSCTLR14},
+ {"trcrsctlr15", TRCRSCTLR15},
+ {"trcrsctlr16", TRCRSCTLR16},
+ {"trcrsctlr17", TRCRSCTLR17},
+ {"trcrsctlr18", TRCRSCTLR18},
+ {"trcrsctlr19", TRCRSCTLR19},
+ {"trcrsctlr20", TRCRSCTLR20},
+ {"trcrsctlr21", TRCRSCTLR21},
+ {"trcrsctlr22", TRCRSCTLR22},
+ {"trcrsctlr23", TRCRSCTLR23},
+ {"trcrsctlr24", TRCRSCTLR24},
+ {"trcrsctlr25", TRCRSCTLR25},
+ {"trcrsctlr26", TRCRSCTLR26},
+ {"trcrsctlr27", TRCRSCTLR27},
+ {"trcrsctlr28", TRCRSCTLR28},
+ {"trcrsctlr29", TRCRSCTLR29},
+ {"trcrsctlr30", TRCRSCTLR30},
+ {"trcrsctlr31", TRCRSCTLR31},
+ {"trcssccr0", TRCSSCCR0},
+ {"trcssccr1", TRCSSCCR1},
+ {"trcssccr2", TRCSSCCR2},
+ {"trcssccr3", TRCSSCCR3},
+ {"trcssccr4", TRCSSCCR4},
+ {"trcssccr5", TRCSSCCR5},
+ {"trcssccr6", TRCSSCCR6},
+ {"trcssccr7", TRCSSCCR7},
+ {"trcsscsr0", TRCSSCSR0},
+ {"trcsscsr1", TRCSSCSR1},
+ {"trcsscsr2", TRCSSCSR2},
+ {"trcsscsr3", TRCSSCSR3},
+ {"trcsscsr4", TRCSSCSR4},
+ {"trcsscsr5", TRCSSCSR5},
+ {"trcsscsr6", TRCSSCSR6},
+ {"trcsscsr7", TRCSSCSR7},
+ {"trcsspcicr0", TRCSSPCICR0},
+ {"trcsspcicr1", TRCSSPCICR1},
+ {"trcsspcicr2", TRCSSPCICR2},
+ {"trcsspcicr3", TRCSSPCICR3},
+ {"trcsspcicr4", TRCSSPCICR4},
+ {"trcsspcicr5", TRCSSPCICR5},
+ {"trcsspcicr6", TRCSSPCICR6},
+ {"trcsspcicr7", TRCSSPCICR7},
+ {"trcpdcr", TRCPDCR},
+ {"trcacvr0", TRCACVR0},
+ {"trcacvr1", TRCACVR1},
+ {"trcacvr2", TRCACVR2},
+ {"trcacvr3", TRCACVR3},
+ {"trcacvr4", TRCACVR4},
+ {"trcacvr5", TRCACVR5},
+ {"trcacvr6", TRCACVR6},
+ {"trcacvr7", TRCACVR7},
+ {"trcacvr8", TRCACVR8},
+ {"trcacvr9", TRCACVR9},
+ {"trcacvr10", TRCACVR10},
+ {"trcacvr11", TRCACVR11},
+ {"trcacvr12", TRCACVR12},
+ {"trcacvr13", TRCACVR13},
+ {"trcacvr14", TRCACVR14},
+ {"trcacvr15", TRCACVR15},
+ {"trcacatr0", TRCACATR0},
+ {"trcacatr1", TRCACATR1},
+ {"trcacatr2", TRCACATR2},
+ {"trcacatr3", TRCACATR3},
+ {"trcacatr4", TRCACATR4},
+ {"trcacatr5", TRCACATR5},
+ {"trcacatr6", TRCACATR6},
+ {"trcacatr7", TRCACATR7},
+ {"trcacatr8", TRCACATR8},
+ {"trcacatr9", TRCACATR9},
+ {"trcacatr10", TRCACATR10},
+ {"trcacatr11", TRCACATR11},
+ {"trcacatr12", TRCACATR12},
+ {"trcacatr13", TRCACATR13},
+ {"trcacatr14", TRCACATR14},
+ {"trcacatr15", TRCACATR15},
+ {"trcdvcvr0", TRCDVCVR0},
+ {"trcdvcvr1", TRCDVCVR1},
+ {"trcdvcvr2", TRCDVCVR2},
+ {"trcdvcvr3", TRCDVCVR3},
+ {"trcdvcvr4", TRCDVCVR4},
+ {"trcdvcvr5", TRCDVCVR5},
+ {"trcdvcvr6", TRCDVCVR6},
+ {"trcdvcvr7", TRCDVCVR7},
+ {"trcdvcmr0", TRCDVCMR0},
+ {"trcdvcmr1", TRCDVCMR1},
+ {"trcdvcmr2", TRCDVCMR2},
+ {"trcdvcmr3", TRCDVCMR3},
+ {"trcdvcmr4", TRCDVCMR4},
+ {"trcdvcmr5", TRCDVCMR5},
+ {"trcdvcmr6", TRCDVCMR6},
+ {"trcdvcmr7", TRCDVCMR7},
+ {"trccidcvr0", TRCCIDCVR0},
+ {"trccidcvr1", TRCCIDCVR1},
+ {"trccidcvr2", TRCCIDCVR2},
+ {"trccidcvr3", TRCCIDCVR3},
+ {"trccidcvr4", TRCCIDCVR4},
+ {"trccidcvr5", TRCCIDCVR5},
+ {"trccidcvr6", TRCCIDCVR6},
+ {"trccidcvr7", TRCCIDCVR7},
+ {"trcvmidcvr0", TRCVMIDCVR0},
+ {"trcvmidcvr1", TRCVMIDCVR1},
+ {"trcvmidcvr2", TRCVMIDCVR2},
+ {"trcvmidcvr3", TRCVMIDCVR3},
+ {"trcvmidcvr4", TRCVMIDCVR4},
+ {"trcvmidcvr5", TRCVMIDCVR5},
+ {"trcvmidcvr6", TRCVMIDCVR6},
+ {"trcvmidcvr7", TRCVMIDCVR7},
+ {"trccidcctlr0", TRCCIDCCTLR0},
+ {"trccidcctlr1", TRCCIDCCTLR1},
+ {"trcvmidcctlr0", TRCVMIDCCTLR0},
+ {"trcvmidcctlr1", TRCVMIDCCTLR1},
+ {"trcitctrl", TRCITCTRL},
+ {"trcclaimset", TRCCLAIMSET},
+ {"trcclaimclr", TRCCLAIMCLR},
+
+ // GICv3 registers
+ {"icc_bpr1_el1", ICC_BPR1_EL1},
+ {"icc_bpr0_el1", ICC_BPR0_EL1},
+ {"icc_pmr_el1", ICC_PMR_EL1},
+ {"icc_ctlr_el1", ICC_CTLR_EL1},
+ {"icc_ctlr_el3", ICC_CTLR_EL3},
+ {"icc_sre_el1", ICC_SRE_EL1},
+ {"icc_sre_el2", ICC_SRE_EL2},
+ {"icc_sre_el3", ICC_SRE_EL3},
+ {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
+ {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
+ {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
+ {"icc_seien_el1", ICC_SEIEN_EL1},
+ {"icc_ap0r0_el1", ICC_AP0R0_EL1},
+ {"icc_ap0r1_el1", ICC_AP0R1_EL1},
+ {"icc_ap0r2_el1", ICC_AP0R2_EL1},
+ {"icc_ap0r3_el1", ICC_AP0R3_EL1},
+ {"icc_ap1r0_el1", ICC_AP1R0_EL1},
+ {"icc_ap1r1_el1", ICC_AP1R1_EL1},
+ {"icc_ap1r2_el1", ICC_AP1R2_EL1},
+ {"icc_ap1r3_el1", ICC_AP1R3_EL1},
+ {"ich_ap0r0_el2", ICH_AP0R0_EL2},
+ {"ich_ap0r1_el2", ICH_AP0R1_EL2},
+ {"ich_ap0r2_el2", ICH_AP0R2_EL2},
+ {"ich_ap0r3_el2", ICH_AP0R3_EL2},
+ {"ich_ap1r0_el2", ICH_AP1R0_EL2},
+ {"ich_ap1r1_el2", ICH_AP1R1_EL2},
+ {"ich_ap1r2_el2", ICH_AP1R2_EL2},
+ {"ich_ap1r3_el2", ICH_AP1R3_EL2},
+ {"ich_hcr_el2", ICH_HCR_EL2},
+ {"ich_misr_el2", ICH_MISR_EL2},
+ {"ich_vmcr_el2", ICH_VMCR_EL2},
+ {"ich_vseir_el2", ICH_VSEIR_EL2},
+ {"ich_lr0_el2", ICH_LR0_EL2},
+ {"ich_lr1_el2", ICH_LR1_EL2},
+ {"ich_lr2_el2", ICH_LR2_EL2},
+ {"ich_lr3_el2", ICH_LR3_EL2},
+ {"ich_lr4_el2", ICH_LR4_EL2},
+ {"ich_lr5_el2", ICH_LR5_EL2},
+ {"ich_lr6_el2", ICH_LR6_EL2},
+ {"ich_lr7_el2", ICH_LR7_EL2},
+ {"ich_lr8_el2", ICH_LR8_EL2},
+ {"ich_lr9_el2", ICH_LR9_EL2},
+ {"ich_lr10_el2", ICH_LR10_EL2},
+ {"ich_lr11_el2", ICH_LR11_EL2},
+ {"ich_lr12_el2", ICH_LR12_EL2},
+ {"ich_lr13_el2", ICH_LR13_EL2},
+ {"ich_lr14_el2", ICH_LR14_EL2},
+ {"ich_lr15_el2", ICH_LR15_EL2}
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+ // First search the registers shared by all
+ std::string NameLower = Name.lower();
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Name == NameLower) {
+ Valid = true;
+ return SysRegPairs[i].Value;
+ }
+ }
+
+ // Now try the instruction-specific registers (either read-only or
+ // write-only).
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Name == NameLower) {
+ Valid = true;
+ return InstPairs[i].Value;
+ }
+ }
+
+ // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+ // are: 11 xxx 1x11 xxxx xxx
+ Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+ SmallVector<StringRef, 4> Ops;
+ if (!GenericRegPattern.match(NameLower, &Ops)) {
+ Valid = false;
+ return -1;
+ }
+
+ uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+ uint32_t Bits;
+ Ops[1].getAsInteger(10, Op1);
+ Ops[2].getAsInteger(10, CRn);
+ Ops[3].getAsInteger(10, CRm);
+ Ops[4].getAsInteger(10, Op2);
+ Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+ Valid = true;
+ return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Value == Bits) {
+ Valid = true;
+ return SysRegPairs[i].Name;
+ }
+ }
+
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Value == Bits) {
+ Valid = true;
+ return InstPairs[i].Name;
+ }
+ }
+
+ uint32_t Op0 = (Bits >> 14) & 0x3;
+ uint32_t Op1 = (Bits >> 11) & 0x7;
+ uint32_t CRn = (Bits >> 7) & 0xf;
+ uint32_t CRm = (Bits >> 3) & 0xf;
+ uint32_t Op2 = Bits & 0x7;
+
+ // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+ // name.
+ if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+ Valid = false;
+ return "";
+ }
+
+ assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+ Valid = true;
+ return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+ + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+ {"ipas2e1is", IPAS2E1IS},
+ {"ipas2le1is", IPAS2LE1IS},
+ {"vmalle1is", VMALLE1IS},
+ {"alle2is", ALLE2IS},
+ {"alle3is", ALLE3IS},
+ {"vae1is", VAE1IS},
+ {"vae2is", VAE2IS},
+ {"vae3is", VAE3IS},
+ {"aside1is", ASIDE1IS},
+ {"vaae1is", VAAE1IS},
+ {"alle1is", ALLE1IS},
+ {"vale1is", VALE1IS},
+ {"vale2is", VALE2IS},
+ {"vale3is", VALE3IS},
+ {"vmalls12e1is", VMALLS12E1IS},
+ {"vaale1is", VAALE1IS},
+ {"ipas2e1", IPAS2E1},
+ {"ipas2le1", IPAS2LE1},
+ {"vmalle1", VMALLE1},
+ {"alle2", ALLE2},
+ {"alle3", ALLE3},
+ {"vae1", VAE1},
+ {"vae2", VAE2},
+ {"vae3", VAE3},
+ {"aside1", ASIDE1},
+ {"vaae1", VAAE1},
+ {"alle1", ALLE1},
+ {"vale1", VALE1},
+ {"vale2", VALE2},
+ {"vale3", VALE3},
+ {"vmalls12e1", VMALLS12E1},
+ {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+ : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+ const fltSemantics &Sem = Val.getSemantics();
+ unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+ uint32_t ExpMask;
+ switch (FracBits) {
+ case 10: // IEEE half-precision
+ ExpMask = 0x1f;
+ break;
+ case 23: // IEEE single-precision
+ ExpMask = 0xff;
+ break;
+ case 52: // IEEE double-precision
+ ExpMask = 0x7ff;
+ break;
+ case 112: // IEEE quad-precision
+ // No immediates are valid for double precision.
+ return false;
+ default:
+ llvm_unreachable("Only half, single and double precision supported");
+ }
+
+ uint32_t ExpStart = FracBits;
+ uint64_t FracMask = (1ULL << FracBits) - 1;
+
+ uint32_t Sign = Val.isNegative();
+
+ uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+ uint64_t Fraction = Bits & FracMask;
+ int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+ Exponent -= ExpMask >> 1;
+
+ // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+ // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+ // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+ uint64_t A64FracStart = FracBits - 4;
+ uint64_t A64FracMask = 0xf;
+
+ // Are there too many fraction bits?
+ if (Fraction & ~(A64FracMask << A64FracStart))
+ return false;
+
+ if (Exponent < -3 || Exponent > 4)
+ return false;
+
+ uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+ uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+ Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+ return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms | immr | size | R | S |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) |
+// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) |
+// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) |
+// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) |
+// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) |
+// | 0 | 11111x | - | | UNALLOCATED | |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+// RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+// RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+ int RepeatWidth;
+ int Rotation = 0;
+ int Num1s = 0;
+
+ // Because there are S+1 ones in the replicated mask, an immediate of all
+ // zeros is not allowed. Filtering it here is probably more efficient.
+ if (Imm == 0) return false;
+
+ for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+ uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+ uint64_t ReplicatedMask = Imm & RepeatMask;
+
+ if (ReplicatedMask == 0) continue;
+
+ // First we have to make sure the mask is actually repeated in each slot for
+ // this width-specifier.
+ bool IsReplicatedMask = true;
+ for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+ if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+ IsReplicatedMask = false;
+ break;
+ }
+ }
+ if (!IsReplicatedMask) continue;
+
+ // Now we have to work out the amount of rotation needed. The first part of
+ // this calculation is actually independent of RepeatWidth, but the complex
+ // case will depend on it.
+ Rotation = CountTrailingZeros_64(Imm);
+ if (Rotation == 0) {
+ // There were no leading zeros, which means it's either in place or there
+ // are 1s at each end (e.g. 0x8003 needs rotating).
+ Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+ : CountLeadingOnes_32(Imm);
+ Rotation = RepeatWidth - Rotation;
+ }
+
+ uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+ | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+ // Of course, they may not actually be ones, so we have to check that:
+ if (!isMask_64(ReplicatedOnes))
+ continue;
+
+ Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+ // We know we've got an almost valid encoding (certainly, if this is invalid
+ // no other parameters would work).
+ break;
+ }
+
+ // The encodings which would produce all 1s are RESERVED.
+ if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+ uint32_t N = RepeatWidth == 64;
+ uint32_t ImmR = RepeatWidth - Rotation;
+ uint32_t ImmS = Num1s - 1;
+
+ switch (RepeatWidth) {
+ default: break; // No action required for other valid rotations.
+ case 16: ImmS |= 0x20; break; // 10ssss
+ case 8: ImmS |= 0x30; break; // 110sss
+ case 4: ImmS |= 0x38; break; // 1110ss
+ case 2: ImmS |= 0x3c; break; // 11110s
+ }
+
+ Bits = ImmS | (ImmR << 6) | (N << 12);
+
+ return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
+ uint64_t &Imm) {
+ uint32_t N = Bits >> 12;
+ uint32_t ImmR = (Bits >> 6) & 0x3f;
+ uint32_t ImmS = Bits & 0x3f;
+
+ // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+ // instructions.
+ if (RegWidth == 32 && N != 0) return false;
+
+ int Width = 0;
+ if (N == 1)
+ Width = 64;
+ else if ((ImmS & 0x20) == 0)
+ Width = 32;
+ else if ((ImmS & 0x10) == 0)
+ Width = 16;
+ else if ((ImmS & 0x08) == 0)
+ Width = 8;
+ else if ((ImmS & 0x04) == 0)
+ Width = 4;
+ else if ((ImmS & 0x02) == 0)
+ Width = 2;
+ else {
+ // ImmS is 0b11111x: UNALLOCATED
+ return false;
+ }
+
+ int Num1s = (ImmS & (Width - 1)) + 1;
+
+ // All encodings which would map to -1 (signed) are RESERVED.
+ if (Num1s == Width) return false;
+
+ int Rotation = (ImmR & (Width - 1));
+ uint64_t Mask = (1ULL << Num1s) - 1;
+ uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+ Mask = (Mask >> Rotation)
+ | ((Mask << (Width - Rotation)) & WidthMask);
+
+ Imm = 0;
+ for (unsigned i = 0; i < RegWidth / Width; ++i) {
+ Imm |= Mask;
+ Mask <<= Width;
+ }
+
+ return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // If high bits are set then a 32-bit MOVZ can't possibly work.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ for (int i = 0; i < RegWidth; i += 16) {
+ // If the value is 0 when we mask out all the bits that could be set with
+ // the current LSL value then it's representable.
+ if ((Value & ~(0xffffULL << i)) == 0) {
+ Shift = i / 16;
+ UImm16 = (Value >> i) & 0xffff;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+ // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+ // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+ // a valid input for isMOVZImm.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+ return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+ int &UImm16, int &Shift) {
+ if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+ return false;
+
+ return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
new file mode 100644
index 000000000000..1b773d632ebe
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -0,0 +1,1068 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ = 0, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Unsigned higher or same >, ==, or unordered
+ LO, // Unsigned lower or same Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Ordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Signed greater than Greater than
+ LE, // Signed less than or equal <, ==, or unordered
+ AL, // Always (unconditional) Always (unconditional)
+ NV, // Always (unconditional) Always (unconditional)
+ // Note the NV exists purely to disassemble 0b1111. Execution
+ // is "always".
+ Invalid
+ };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case A64CC::EQ: return "eq";
+ case A64CC::NE: return "ne";
+ case A64CC::HS: return "hs";
+ case A64CC::LO: return "lo";
+ case A64CC::MI: return "mi";
+ case A64CC::PL: return "pl";
+ case A64CC::VS: return "vs";
+ case A64CC::VC: return "vc";
+ case A64CC::HI: return "hi";
+ case A64CC::LS: return "ls";
+ case A64CC::GE: return "ge";
+ case A64CC::LT: return "lt";
+ case A64CC::GT: return "gt";
+ case A64CC::LE: return "le";
+ case A64CC::AL: return "al";
+ case A64CC::NV: return "nv";
+ }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+ return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+ .Case("eq", A64CC::EQ)
+ .Case("ne", A64CC::NE)
+ .Case("ne", A64CC::NE)
+ .Case("hs", A64CC::HS)
+ .Case("cs", A64CC::HS)
+ .Case("lo", A64CC::LO)
+ .Case("cc", A64CC::LO)
+ .Case("mi", A64CC::MI)
+ .Case("pl", A64CC::PL)
+ .Case("vs", A64CC::VS)
+ .Case("vc", A64CC::VC)
+ .Case("hi", A64CC::HI)
+ .Case("ls", A64CC::LS)
+ .Case("ge", A64CC::GE)
+ .Case("lt", A64CC::LT)
+ .Case("gt", A64CC::GT)
+ .Case("le", A64CC::LE)
+ .Case("al", A64CC::AL)
+ .Case("nv", A64CC::NV)
+ .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+ // It turns out that the condition codes have been designed so that in order
+ // to reverse the intent of the condition you only have to invert the low bit:
+
+ return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+ struct Mapping {
+ const char *Name;
+ uint32_t Value;
+ };
+
+ template<int N>
+ NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+ : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+ StringRef toString(uint32_t Value, bool &Valid) const;
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+
+ /// Many of the instructions allow an alternative assembly form consisting of
+ /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+ /// N being 0 indicates no immediate syntax-form is allowed.
+ bool validImm(uint32_t Value) const;
+protected:
+ const Mapping *Pairs;
+ size_t NumPairs;
+ uint32_t TooBigImm;
+};
+
+namespace A64AT {
+ enum ATValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ S1E1R = 0x43c0, // 01 000 0111 1000 000
+ S1E2R = 0x63c0, // 01 100 0111 1000 000
+ S1E3R = 0x73c0, // 01 110 0111 1000 000
+ S1E1W = 0x43c1, // 01 000 0111 1000 001
+ S1E2W = 0x63c1, // 01 100 0111 1000 001
+ S1E3W = 0x73c1, // 01 110 0111 1000 001
+ S1E0R = 0x43c2, // 01 000 0111 1000 010
+ S1E0W = 0x43c3, // 01 000 0111 1000 011
+ S12E1R = 0x63c4, // 01 100 0111 1000 100
+ S12E1W = 0x63c5, // 01 100 0111 1000 101
+ S12E0R = 0x63c6, // 01 100 0111 1000 110
+ S12E0W = 0x63c7 // 01 100 0111 1000 111
+ };
+
+ struct ATMapper : NamedImmMapper {
+ const static Mapping ATPairs[];
+
+ ATMapper();
+ };
+
+}
+namespace A64DB {
+ enum DBValues {
+ Invalid = -1,
+ OSHLD = 0x1,
+ OSHST = 0x2,
+ OSH = 0x3,
+ NSHLD = 0x5,
+ NSHST = 0x6,
+ NSH = 0x7,
+ ISHLD = 0x9,
+ ISHST = 0xa,
+ ISH = 0xb,
+ LD = 0xd,
+ ST = 0xe,
+ SY = 0xf
+ };
+
+ struct DBarrierMapper : NamedImmMapper {
+ const static Mapping DBarrierPairs[];
+
+ DBarrierMapper();
+ };
+}
+
+namespace A64DC {
+ enum DCValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ ZVA = 0x5ba1, // 01 011 0111 0100 001
+ IVAC = 0x43b1, // 01 000 0111 0110 001
+ ISW = 0x43b2, // 01 000 0111 0110 010
+ CVAC = 0x5bd1, // 01 011 0111 1010 001
+ CSW = 0x43d2, // 01 000 0111 1010 010
+ CVAU = 0x5bd9, // 01 011 0111 1011 001
+ CIVAC = 0x5bf1, // 01 011 0111 1110 001
+ CISW = 0x43f2 // 01 000 0111 1110 010
+ };
+
+ struct DCMapper : NamedImmMapper {
+ const static Mapping DCPairs[];
+
+ DCMapper();
+ };
+
+}
+
+namespace A64IC {
+ enum ICValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ IALLUIS = 0x0388, // 000 0111 0001 000
+ IALLU = 0x03a8, // 000 0111 0101 000
+ IVAU = 0x1ba9 // 011 0111 0101 001
+ };
+
+
+ struct ICMapper : NamedImmMapper {
+ const static Mapping ICPairs[];
+
+ ICMapper();
+ };
+
+ static inline bool NeedsRegister(ICValues Val) {
+ return Val == IVAU;
+ }
+}
+
+namespace A64ISB {
+ enum ISBValues {
+ Invalid = -1,
+ SY = 0xf
+ };
+ struct ISBMapper : NamedImmMapper {
+ const static Mapping ISBPairs[];
+
+ ISBMapper();
+ };
+}
+
+namespace A64PRFM {
+ enum PRFMValues {
+ Invalid = -1,
+ PLDL1KEEP = 0x00,
+ PLDL1STRM = 0x01,
+ PLDL2KEEP = 0x02,
+ PLDL2STRM = 0x03,
+ PLDL3KEEP = 0x04,
+ PLDL3STRM = 0x05,
+ PLIL1KEEP = 0x08,
+ PLIL1STRM = 0x09,
+ PLIL2KEEP = 0x0a,
+ PLIL2STRM = 0x0b,
+ PLIL3KEEP = 0x0c,
+ PLIL3STRM = 0x0d,
+ PSTL1KEEP = 0x10,
+ PSTL1STRM = 0x11,
+ PSTL2KEEP = 0x12,
+ PSTL2STRM = 0x13,
+ PSTL3KEEP = 0x14,
+ PSTL3STRM = 0x15
+ };
+
+ struct PRFMMapper : NamedImmMapper {
+ const static Mapping PRFMPairs[];
+
+ PRFMMapper();
+ };
+}
+
+namespace A64PState {
+ enum PStateValues {
+ Invalid = -1,
+ SPSel = 0x05,
+ DAIFSet = 0x1e,
+ DAIFClr = 0x1f
+ };
+
+ struct PStateMapper : NamedImmMapper {
+ const static Mapping PStatePairs[];
+
+ PStateMapper();
+ };
+
+}
+
+namespace A64SE {
+ enum ShiftExtSpecifiers {
+ Invalid = -1,
+ LSL,
+ LSR,
+ ASR,
+ ROR,
+
+ UXTB,
+ UXTH,
+ UXTW,
+ UXTX,
+
+ SXTB,
+ SXTH,
+ SXTW,
+ SXTX
+ };
+}
+
+namespace A64SysReg {
+ enum SysRegROValues {
+ MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
+ DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000
+ MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000
+ OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100
+ DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110
+ PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110
+ PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111
+ MIDR_EL1 = 0xc000, // 11 000 0000 0000 000
+ CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000
+ CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001
+ CTR_EL0 = 0xd801, // 11 011 0000 0000 001
+ MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101
+ REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110
+ AIDR_EL1 = 0xc807, // 11 001 0000 0000 111
+ DCZID_EL0 = 0xd807, // 11 011 0000 0000 111
+ ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000
+ ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001
+ ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010
+ ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011
+ ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100
+ ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101
+ ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110
+ ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111
+ ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000
+ ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001
+ ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010
+ ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011
+ ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100
+ ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101
+ ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000
+ ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001
+ ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000
+ ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001
+ ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100
+ ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101
+ ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000
+ ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
+ ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
+ ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
+ MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
+ MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
+ RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001
+ RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001
+ RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001
+ ISR_EL1 = 0xc608, // 11 000 1100 0001 000
+ CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001
+ CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010
+
+ // Trace registers
+ TRCSTATR = 0x8818, // 10 001 0000 0011 000
+ TRCIDR8 = 0x8806, // 10 001 0000 0000 110
+ TRCIDR9 = 0x880e, // 10 001 0000 0001 110
+ TRCIDR10 = 0x8816, // 10 001 0000 0010 110
+ TRCIDR11 = 0x881e, // 10 001 0000 0011 110
+ TRCIDR12 = 0x8826, // 10 001 0000 0100 110
+ TRCIDR13 = 0x882e, // 10 001 0000 0101 110
+ TRCIDR0 = 0x8847, // 10 001 0000 1000 111
+ TRCIDR1 = 0x884f, // 10 001 0000 1001 111
+ TRCIDR2 = 0x8857, // 10 001 0000 1010 111
+ TRCIDR3 = 0x885f, // 10 001 0000 1011 111
+ TRCIDR4 = 0x8867, // 10 001 0000 1100 111
+ TRCIDR5 = 0x886f, // 10 001 0000 1101 111
+ TRCIDR6 = 0x8877, // 10 001 0000 1110 111
+ TRCIDR7 = 0x887f, // 10 001 0000 1111 111
+ TRCOSLSR = 0x888c, // 10 001 0001 0001 100
+ TRCPDSR = 0x88ac, // 10 001 0001 0101 100
+ TRCDEVAFF0 = 0x8bd6, // 10 001 0111 1010 110
+ TRCDEVAFF1 = 0x8bde, // 10 001 0111 1011 110
+ TRCLSR = 0x8bee, // 10 001 0111 1101 110
+ TRCAUTHSTATUS = 0x8bf6, // 10 001 0111 1110 110
+ TRCDEVARCH = 0x8bfe, // 10 001 0111 1111 110
+ TRCDEVID = 0x8b97, // 10 001 0111 0010 111
+ TRCDEVTYPE = 0x8b9f, // 10 001 0111 0011 111
+ TRCPIDR4 = 0x8ba7, // 10 001 0111 0100 111
+ TRCPIDR5 = 0x8baf, // 10 001 0111 0101 111
+ TRCPIDR6 = 0x8bb7, // 10 001 0111 0110 111
+ TRCPIDR7 = 0x8bbf, // 10 001 0111 0111 111
+ TRCPIDR0 = 0x8bc7, // 10 001 0111 1000 111
+ TRCPIDR1 = 0x8bcf, // 10 001 0111 1001 111
+ TRCPIDR2 = 0x8bd7, // 10 001 0111 1010 111
+ TRCPIDR3 = 0x8bdf, // 10 001 0111 1011 111
+ TRCCIDR0 = 0x8be7, // 10 001 0111 1100 111
+ TRCCIDR1 = 0x8bef, // 10 001 0111 1101 111
+ TRCCIDR2 = 0x8bf7, // 10 001 0111 1110 111
+ TRCCIDR3 = 0x8bff, // 10 001 0111 1111 111
+
+ // GICv3 registers
+ ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000
+ ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000
+ ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010
+ ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010
+ ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011
+ ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001
+ ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011
+ ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101
+ };
+
+ enum SysRegWOValues {
+ DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000
+ OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100
+ PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100
+
+ // Trace Registers
+ TRCOSLAR = 0x8884, // 10 001 0001 0000 100
+ TRCLAR = 0x8be6, // 10 001 0111 1100 110
+
+ // GICv3 registers
+ ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001
+ ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001
+ ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001
+ ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101
+ ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110
+ ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111
+ };
+
+ enum SysRegValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010
+ OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010
+ TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000
+ MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000
+ MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010
+ DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000
+ OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010
+ DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000
+ DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100
+ DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100
+ DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100
+ DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100
+ DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100
+ DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100
+ DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100
+ DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100
+ DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100
+ DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100
+ DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100
+ DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100
+ DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100
+ DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100
+ DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100
+ DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100
+ DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101
+ DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101
+ DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101
+ DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101
+ DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101
+ DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101
+ DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101
+ DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101
+ DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101
+ DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101
+ DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101
+ DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101
+ DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101
+ DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101
+ DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101
+ DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101
+ DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110
+ DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110
+ DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110
+ DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110
+ DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110
+ DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110
+ DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110
+ DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110
+ DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110
+ DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110
+ DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110
+ DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110
+ DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110
+ DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110
+ DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110
+ DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110
+ DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111
+ DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111
+ DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111
+ DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111
+ DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111
+ DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111
+ DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111
+ DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111
+ DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111
+ DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111
+ DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111
+ DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111
+ DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111
+ DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111
+ DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111
+ DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111
+ TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000
+ OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100
+ DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100
+ DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110
+ DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110
+ CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000
+ VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000
+ VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101
+ CPACR_EL1 = 0xc082, // 11 000 0001 0000 010
+ SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000
+ SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000
+ SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000
+ ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001
+ ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001
+ ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001
+ HCR_EL2 = 0xe088, // 11 100 0001 0001 000
+ SCR_EL3 = 0xf088, // 11 110 0001 0001 000
+ MDCR_EL2 = 0xe089, // 11 100 0001 0001 001
+ SDER32_EL3 = 0xf089, // 11 110 0001 0001 001
+ CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010
+ CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010
+ HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011
+ HACR_EL2 = 0xe08f, // 11 100 0001 0001 111
+ MDCR_EL3 = 0xf099, // 11 110 0001 0011 001
+ TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000
+ TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000
+ TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000
+ TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001
+ TCR_EL1 = 0xc102, // 11 000 0010 0000 010
+ TCR_EL2 = 0xe102, // 11 100 0010 0000 010
+ TCR_EL3 = 0xf102, // 11 110 0010 0000 010
+ VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000
+ VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010
+ DACR32_EL2 = 0xe180, // 11 100 0011 0000 000
+ SPSR_EL1 = 0xc200, // 11 000 0100 0000 000
+ SPSR_EL2 = 0xe200, // 11 100 0100 0000 000
+ SPSR_EL3 = 0xf200, // 11 110 0100 0000 000
+ ELR_EL1 = 0xc201, // 11 000 0100 0000 001
+ ELR_EL2 = 0xe201, // 11 100 0100 0000 001
+ ELR_EL3 = 0xf201, // 11 110 0100 0000 001
+ SP_EL0 = 0xc208, // 11 000 0100 0001 000
+ SP_EL1 = 0xe208, // 11 100 0100 0001 000
+ SP_EL2 = 0xf208, // 11 110 0100 0001 000
+ SPSel = 0xc210, // 11 000 0100 0010 000
+ NZCV = 0xda10, // 11 011 0100 0010 000
+ DAIF = 0xda11, // 11 011 0100 0010 001
+ CurrentEL = 0xc212, // 11 000 0100 0010 010
+ SPSR_irq = 0xe218, // 11 100 0100 0011 000
+ SPSR_abt = 0xe219, // 11 100 0100 0011 001
+ SPSR_und = 0xe21a, // 11 100 0100 0011 010
+ SPSR_fiq = 0xe21b, // 11 100 0100 0011 011
+ FPCR = 0xda20, // 11 011 0100 0100 000
+ FPSR = 0xda21, // 11 011 0100 0100 001
+ DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000
+ DLR_EL0 = 0xda29, // 11 011 0100 0101 001
+ IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001
+ AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000
+ AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000
+ AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000
+ AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001
+ AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001
+ AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001
+ ESR_EL1 = 0xc290, // 11 000 0101 0010 000
+ ESR_EL2 = 0xe290, // 11 100 0101 0010 000
+ ESR_EL3 = 0xf290, // 11 110 0101 0010 000
+ FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000
+ FAR_EL1 = 0xc300, // 11 000 0110 0000 000
+ FAR_EL2 = 0xe300, // 11 100 0110 0000 000
+ FAR_EL3 = 0xf300, // 11 110 0110 0000 000
+ HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100
+ PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000
+ PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000
+ PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001
+ PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010
+ PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011
+ PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101
+ PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000
+ PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001
+ PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010
+ PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000
+ PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001
+ PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010
+ PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011
+ MAIR_EL1 = 0xc510, // 11 000 1010 0010 000
+ MAIR_EL2 = 0xe510, // 11 100 1010 0010 000
+ MAIR_EL3 = 0xf510, // 11 110 1010 0010 000
+ AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000
+ AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000
+ AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000
+ VBAR_EL1 = 0xc600, // 11 000 1100 0000 000
+ VBAR_EL2 = 0xe600, // 11 100 1100 0000 000
+ VBAR_EL3 = 0xf600, // 11 110 1100 0000 000
+ RMR_EL1 = 0xc602, // 11 000 1100 0000 010
+ RMR_EL2 = 0xe602, // 11 100 1100 0000 010
+ RMR_EL3 = 0xf602, // 11 110 1100 0000 010
+ CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001
+ TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010
+ TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010
+ TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010
+ TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011
+ TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100
+ CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000
+ CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011
+ CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000
+ CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000
+ CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000
+ CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000
+ CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000
+ CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001
+ CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001
+ CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001
+ CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010
+ CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010
+ CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010
+ CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000
+ CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001
+ CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010
+ PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000
+ PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001
+ PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010
+ PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011
+ PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100
+ PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101
+ PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110
+ PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111
+ PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000
+ PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001
+ PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010
+ PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011
+ PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100
+ PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101
+ PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110
+ PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111
+ PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000
+ PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001
+ PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010
+ PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011
+ PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100
+ PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101
+ PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110
+ PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111
+ PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000
+ PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001
+ PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010
+ PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011
+ PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100
+ PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101
+ PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110
+ PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111
+ PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000
+ PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001
+ PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010
+ PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011
+ PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100
+ PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101
+ PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110
+ PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111
+ PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000
+ PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001
+ PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010
+ PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011
+ PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100
+ PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101
+ PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110
+ PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111
+ PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000
+ PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001
+ PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010
+ PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011
+ PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100
+ PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101
+ PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110
+ PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111
+ PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000
+ PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001
+ PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010
+ PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011
+ PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100
+ PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101
+ PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110
+
+ // Trace registers
+ TRCPRGCTLR = 0x8808, // 10 001 0000 0001 000
+ TRCPROCSELR = 0x8810, // 10 001 0000 0010 000
+ TRCCONFIGR = 0x8820, // 10 001 0000 0100 000
+ TRCAUXCTLR = 0x8830, // 10 001 0000 0110 000
+ TRCEVENTCTL0R = 0x8840, // 10 001 0000 1000 000
+ TRCEVENTCTL1R = 0x8848, // 10 001 0000 1001 000
+ TRCSTALLCTLR = 0x8858, // 10 001 0000 1011 000
+ TRCTSCTLR = 0x8860, // 10 001 0000 1100 000
+ TRCSYNCPR = 0x8868, // 10 001 0000 1101 000
+ TRCCCCTLR = 0x8870, // 10 001 0000 1110 000
+ TRCBBCTLR = 0x8878, // 10 001 0000 1111 000
+ TRCTRACEIDR = 0x8801, // 10 001 0000 0000 001
+ TRCQCTLR = 0x8809, // 10 001 0000 0001 001
+ TRCVICTLR = 0x8802, // 10 001 0000 0000 010
+ TRCVIIECTLR = 0x880a, // 10 001 0000 0001 010
+ TRCVISSCTLR = 0x8812, // 10 001 0000 0010 010
+ TRCVIPCSSCTLR = 0x881a, // 10 001 0000 0011 010
+ TRCVDCTLR = 0x8842, // 10 001 0000 1000 010
+ TRCVDSACCTLR = 0x884a, // 10 001 0000 1001 010
+ TRCVDARCCTLR = 0x8852, // 10 001 0000 1010 010
+ TRCSEQEVR0 = 0x8804, // 10 001 0000 0000 100
+ TRCSEQEVR1 = 0x880c, // 10 001 0000 0001 100
+ TRCSEQEVR2 = 0x8814, // 10 001 0000 0010 100
+ TRCSEQRSTEVR = 0x8834, // 10 001 0000 0110 100
+ TRCSEQSTR = 0x883c, // 10 001 0000 0111 100
+ TRCEXTINSELR = 0x8844, // 10 001 0000 1000 100
+ TRCCNTRLDVR0 = 0x8805, // 10 001 0000 0000 101
+ TRCCNTRLDVR1 = 0x880d, // 10 001 0000 0001 101
+ TRCCNTRLDVR2 = 0x8815, // 10 001 0000 0010 101
+ TRCCNTRLDVR3 = 0x881d, // 10 001 0000 0011 101
+ TRCCNTCTLR0 = 0x8825, // 10 001 0000 0100 101
+ TRCCNTCTLR1 = 0x882d, // 10 001 0000 0101 101
+ TRCCNTCTLR2 = 0x8835, // 10 001 0000 0110 101
+ TRCCNTCTLR3 = 0x883d, // 10 001 0000 0111 101
+ TRCCNTVR0 = 0x8845, // 10 001 0000 1000 101
+ TRCCNTVR1 = 0x884d, // 10 001 0000 1001 101
+ TRCCNTVR2 = 0x8855, // 10 001 0000 1010 101
+ TRCCNTVR3 = 0x885d, // 10 001 0000 1011 101
+ TRCIMSPEC0 = 0x8807, // 10 001 0000 0000 111
+ TRCIMSPEC1 = 0x880f, // 10 001 0000 0001 111
+ TRCIMSPEC2 = 0x8817, // 10 001 0000 0010 111
+ TRCIMSPEC3 = 0x881f, // 10 001 0000 0011 111
+ TRCIMSPEC4 = 0x8827, // 10 001 0000 0100 111
+ TRCIMSPEC5 = 0x882f, // 10 001 0000 0101 111
+ TRCIMSPEC6 = 0x8837, // 10 001 0000 0110 111
+ TRCIMSPEC7 = 0x883f, // 10 001 0000 0111 111
+ TRCRSCTLR2 = 0x8890, // 10 001 0001 0010 000
+ TRCRSCTLR3 = 0x8898, // 10 001 0001 0011 000
+ TRCRSCTLR4 = 0x88a0, // 10 001 0001 0100 000
+ TRCRSCTLR5 = 0x88a8, // 10 001 0001 0101 000
+ TRCRSCTLR6 = 0x88b0, // 10 001 0001 0110 000
+ TRCRSCTLR7 = 0x88b8, // 10 001 0001 0111 000
+ TRCRSCTLR8 = 0x88c0, // 10 001 0001 1000 000
+ TRCRSCTLR9 = 0x88c8, // 10 001 0001 1001 000
+ TRCRSCTLR10 = 0x88d0, // 10 001 0001 1010 000
+ TRCRSCTLR11 = 0x88d8, // 10 001 0001 1011 000
+ TRCRSCTLR12 = 0x88e0, // 10 001 0001 1100 000
+ TRCRSCTLR13 = 0x88e8, // 10 001 0001 1101 000
+ TRCRSCTLR14 = 0x88f0, // 10 001 0001 1110 000
+ TRCRSCTLR15 = 0x88f8, // 10 001 0001 1111 000
+ TRCRSCTLR16 = 0x8881, // 10 001 0001 0000 001
+ TRCRSCTLR17 = 0x8889, // 10 001 0001 0001 001
+ TRCRSCTLR18 = 0x8891, // 10 001 0001 0010 001
+ TRCRSCTLR19 = 0x8899, // 10 001 0001 0011 001
+ TRCRSCTLR20 = 0x88a1, // 10 001 0001 0100 001
+ TRCRSCTLR21 = 0x88a9, // 10 001 0001 0101 001
+ TRCRSCTLR22 = 0x88b1, // 10 001 0001 0110 001
+ TRCRSCTLR23 = 0x88b9, // 10 001 0001 0111 001
+ TRCRSCTLR24 = 0x88c1, // 10 001 0001 1000 001
+ TRCRSCTLR25 = 0x88c9, // 10 001 0001 1001 001
+ TRCRSCTLR26 = 0x88d1, // 10 001 0001 1010 001
+ TRCRSCTLR27 = 0x88d9, // 10 001 0001 1011 001
+ TRCRSCTLR28 = 0x88e1, // 10 001 0001 1100 001
+ TRCRSCTLR29 = 0x88e9, // 10 001 0001 1101 001
+ TRCRSCTLR30 = 0x88f1, // 10 001 0001 1110 001
+ TRCRSCTLR31 = 0x88f9, // 10 001 0001 1111 001
+ TRCSSCCR0 = 0x8882, // 10 001 0001 0000 010
+ TRCSSCCR1 = 0x888a, // 10 001 0001 0001 010
+ TRCSSCCR2 = 0x8892, // 10 001 0001 0010 010
+ TRCSSCCR3 = 0x889a, // 10 001 0001 0011 010
+ TRCSSCCR4 = 0x88a2, // 10 001 0001 0100 010
+ TRCSSCCR5 = 0x88aa, // 10 001 0001 0101 010
+ TRCSSCCR6 = 0x88b2, // 10 001 0001 0110 010
+ TRCSSCCR7 = 0x88ba, // 10 001 0001 0111 010
+ TRCSSCSR0 = 0x88c2, // 10 001 0001 1000 010
+ TRCSSCSR1 = 0x88ca, // 10 001 0001 1001 010
+ TRCSSCSR2 = 0x88d2, // 10 001 0001 1010 010
+ TRCSSCSR3 = 0x88da, // 10 001 0001 1011 010
+ TRCSSCSR4 = 0x88e2, // 10 001 0001 1100 010
+ TRCSSCSR5 = 0x88ea, // 10 001 0001 1101 010
+ TRCSSCSR6 = 0x88f2, // 10 001 0001 1110 010
+ TRCSSCSR7 = 0x88fa, // 10 001 0001 1111 010
+ TRCSSPCICR0 = 0x8883, // 10 001 0001 0000 011
+ TRCSSPCICR1 = 0x888b, // 10 001 0001 0001 011
+ TRCSSPCICR2 = 0x8893, // 10 001 0001 0010 011
+ TRCSSPCICR3 = 0x889b, // 10 001 0001 0011 011
+ TRCSSPCICR4 = 0x88a3, // 10 001 0001 0100 011
+ TRCSSPCICR5 = 0x88ab, // 10 001 0001 0101 011
+ TRCSSPCICR6 = 0x88b3, // 10 001 0001 0110 011
+ TRCSSPCICR7 = 0x88bb, // 10 001 0001 0111 011
+ TRCPDCR = 0x88a4, // 10 001 0001 0100 100
+ TRCACVR0 = 0x8900, // 10 001 0010 0000 000
+ TRCACVR1 = 0x8910, // 10 001 0010 0010 000
+ TRCACVR2 = 0x8920, // 10 001 0010 0100 000
+ TRCACVR3 = 0x8930, // 10 001 0010 0110 000
+ TRCACVR4 = 0x8940, // 10 001 0010 1000 000
+ TRCACVR5 = 0x8950, // 10 001 0010 1010 000
+ TRCACVR6 = 0x8960, // 10 001 0010 1100 000
+ TRCACVR7 = 0x8970, // 10 001 0010 1110 000
+ TRCACVR8 = 0x8901, // 10 001 0010 0000 001
+ TRCACVR9 = 0x8911, // 10 001 0010 0010 001
+ TRCACVR10 = 0x8921, // 10 001 0010 0100 001
+ TRCACVR11 = 0x8931, // 10 001 0010 0110 001
+ TRCACVR12 = 0x8941, // 10 001 0010 1000 001
+ TRCACVR13 = 0x8951, // 10 001 0010 1010 001
+ TRCACVR14 = 0x8961, // 10 001 0010 1100 001
+ TRCACVR15 = 0x8971, // 10 001 0010 1110 001
+ TRCACATR0 = 0x8902, // 10 001 0010 0000 010
+ TRCACATR1 = 0x8912, // 10 001 0010 0010 010
+ TRCACATR2 = 0x8922, // 10 001 0010 0100 010
+ TRCACATR3 = 0x8932, // 10 001 0010 0110 010
+ TRCACATR4 = 0x8942, // 10 001 0010 1000 010
+ TRCACATR5 = 0x8952, // 10 001 0010 1010 010
+ TRCACATR6 = 0x8962, // 10 001 0010 1100 010
+ TRCACATR7 = 0x8972, // 10 001 0010 1110 010
+ TRCACATR8 = 0x8903, // 10 001 0010 0000 011
+ TRCACATR9 = 0x8913, // 10 001 0010 0010 011
+ TRCACATR10 = 0x8923, // 10 001 0010 0100 011
+ TRCACATR11 = 0x8933, // 10 001 0010 0110 011
+ TRCACATR12 = 0x8943, // 10 001 0010 1000 011
+ TRCACATR13 = 0x8953, // 10 001 0010 1010 011
+ TRCACATR14 = 0x8963, // 10 001 0010 1100 011
+ TRCACATR15 = 0x8973, // 10 001 0010 1110 011
+ TRCDVCVR0 = 0x8904, // 10 001 0010 0000 100
+ TRCDVCVR1 = 0x8924, // 10 001 0010 0100 100
+ TRCDVCVR2 = 0x8944, // 10 001 0010 1000 100
+ TRCDVCVR3 = 0x8964, // 10 001 0010 1100 100
+ TRCDVCVR4 = 0x8905, // 10 001 0010 0000 101
+ TRCDVCVR5 = 0x8925, // 10 001 0010 0100 101
+ TRCDVCVR6 = 0x8945, // 10 001 0010 1000 101
+ TRCDVCVR7 = 0x8965, // 10 001 0010 1100 101
+ TRCDVCMR0 = 0x8906, // 10 001 0010 0000 110
+ TRCDVCMR1 = 0x8926, // 10 001 0010 0100 110
+ TRCDVCMR2 = 0x8946, // 10 001 0010 1000 110
+ TRCDVCMR3 = 0x8966, // 10 001 0010 1100 110
+ TRCDVCMR4 = 0x8907, // 10 001 0010 0000 111
+ TRCDVCMR5 = 0x8927, // 10 001 0010 0100 111
+ TRCDVCMR6 = 0x8947, // 10 001 0010 1000 111
+ TRCDVCMR7 = 0x8967, // 10 001 0010 1100 111
+ TRCCIDCVR0 = 0x8980, // 10 001 0011 0000 000
+ TRCCIDCVR1 = 0x8990, // 10 001 0011 0010 000
+ TRCCIDCVR2 = 0x89a0, // 10 001 0011 0100 000
+ TRCCIDCVR3 = 0x89b0, // 10 001 0011 0110 000
+ TRCCIDCVR4 = 0x89c0, // 10 001 0011 1000 000
+ TRCCIDCVR5 = 0x89d0, // 10 001 0011 1010 000
+ TRCCIDCVR6 = 0x89e0, // 10 001 0011 1100 000
+ TRCCIDCVR7 = 0x89f0, // 10 001 0011 1110 000
+ TRCVMIDCVR0 = 0x8981, // 10 001 0011 0000 001
+ TRCVMIDCVR1 = 0x8991, // 10 001 0011 0010 001
+ TRCVMIDCVR2 = 0x89a1, // 10 001 0011 0100 001
+ TRCVMIDCVR3 = 0x89b1, // 10 001 0011 0110 001
+ TRCVMIDCVR4 = 0x89c1, // 10 001 0011 1000 001
+ TRCVMIDCVR5 = 0x89d1, // 10 001 0011 1010 001
+ TRCVMIDCVR6 = 0x89e1, // 10 001 0011 1100 001
+ TRCVMIDCVR7 = 0x89f1, // 10 001 0011 1110 001
+ TRCCIDCCTLR0 = 0x8982, // 10 001 0011 0000 010
+ TRCCIDCCTLR1 = 0x898a, // 10 001 0011 0001 010
+ TRCVMIDCCTLR0 = 0x8992, // 10 001 0011 0010 010
+ TRCVMIDCCTLR1 = 0x899a, // 10 001 0011 0011 010
+ TRCITCTRL = 0x8b84, // 10 001 0111 0000 100
+ TRCCLAIMSET = 0x8bc6, // 10 001 0111 1000 110
+ TRCCLAIMCLR = 0x8bce, // 10 001 0111 1001 110
+
+ // GICv3 registers
+ ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011
+ ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011
+ ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000
+ ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100
+ ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100
+ ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101
+ ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101
+ ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101
+ ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110
+ ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111
+ ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111
+ ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000
+ ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100
+ ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101
+ ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110
+ ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111
+ ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000
+ ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001
+ ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010
+ ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011
+ ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000
+ ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001
+ ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010
+ ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011
+ ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000
+ ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001
+ ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010
+ ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011
+ ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000
+ ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010
+ ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111
+ ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100
+ ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000
+ ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001
+ ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010
+ ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011
+ ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100
+ ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101
+ ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110
+ ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111
+ ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000
+ ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001
+ ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010
+ ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011
+ ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100
+ ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
+ ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
+ ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111
+ };
+
+ // Note that these do not inherit from NamedImmMapper. This class is
+ // sufficiently different in its behaviour that I don't believe it's worth
+ // burdening the common NamedImmMapper with abstractions only needed in
+ // this one case.
+ struct SysRegMapper {
+ static const NamedImmMapper::Mapping SysRegPairs[];
+
+ const NamedImmMapper::Mapping *InstPairs;
+ size_t NumInstPairs;
+
+ SysRegMapper() {}
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+ std::string toString(uint32_t Bits, bool &Valid) const;
+ };
+
+ struct MSRMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MSRPairs[];
+ MSRMapper();
+ };
+
+ struct MRSMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MRSPairs[];
+ MRSMapper();
+ };
+
+ uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+ enum TLBIValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ IPAS2E1IS = 0x6401, // 01 100 1000 0000 001
+ IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101
+ VMALLE1IS = 0x4418, // 01 000 1000 0011 000
+ ALLE2IS = 0x6418, // 01 100 1000 0011 000
+ ALLE3IS = 0x7418, // 01 110 1000 0011 000
+ VAE1IS = 0x4419, // 01 000 1000 0011 001
+ VAE2IS = 0x6419, // 01 100 1000 0011 001
+ VAE3IS = 0x7419, // 01 110 1000 0011 001
+ ASIDE1IS = 0x441a, // 01 000 1000 0011 010
+ VAAE1IS = 0x441b, // 01 000 1000 0011 011
+ ALLE1IS = 0x641c, // 01 100 1000 0011 100
+ VALE1IS = 0x441d, // 01 000 1000 0011 101
+ VALE2IS = 0x641d, // 01 100 1000 0011 101
+ VALE3IS = 0x741d, // 01 110 1000 0011 101
+ VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110
+ VAALE1IS = 0x441f, // 01 000 1000 0011 111
+ IPAS2E1 = 0x6421, // 01 100 1000 0100 001
+ IPAS2LE1 = 0x6425, // 01 100 1000 0100 101
+ VMALLE1 = 0x4438, // 01 000 1000 0111 000
+ ALLE2 = 0x6438, // 01 100 1000 0111 000
+ ALLE3 = 0x7438, // 01 110 1000 0111 000
+ VAE1 = 0x4439, // 01 000 1000 0111 001
+ VAE2 = 0x6439, // 01 100 1000 0111 001
+ VAE3 = 0x7439, // 01 110 1000 0111 001
+ ASIDE1 = 0x443a, // 01 000 1000 0111 010
+ VAAE1 = 0x443b, // 01 000 1000 0111 011
+ ALLE1 = 0x643c, // 01 100 1000 0111 100
+ VALE1 = 0x443d, // 01 000 1000 0111 101
+ VALE2 = 0x643d, // 01 100 1000 0111 101
+ VALE3 = 0x743d, // 01 110 1000 0111 101
+ VMALLS12E1 = 0x643e, // 01 100 1000 0111 110
+ VAALE1 = 0x443f // 01 000 1000 0111 111
+ };
+
+ struct TLBIMapper : NamedImmMapper {
+ const static Mapping TLBIPairs[];
+
+ TLBIMapper();
+ };
+
+ static inline bool NeedsRegister(TLBIValues Val) {
+ switch (Val) {
+ case VMALLE1IS:
+ case ALLE2IS:
+ case ALLE3IS:
+ case ALLE1IS:
+ case VMALLS12E1IS:
+ case VMALLE1:
+ case ALLE2:
+ case ALLE3:
+ case ALLE1:
+ case VMALLS12E1:
+ return false;
+ default:
+ return true;
+ }
+ }
+}
+
+namespace AArch64II {
+
+ enum TOF {
+ //===--------------------------------------------------------------===//
+ // AArch64 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ // MO_GOT - Represents a relocation referring to the GOT entry of a given
+ // symbol. Used in adrp.
+ MO_GOT,
+
+ // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+ // GOT entry of a given symbol. Used in ldr only.
+ MO_GOT_LO12,
+
+ // MO_DTPREL_* - Represents a relocation referring to the offset from a
+ // module's dynamic thread pointer. Used in the local-dynamic TLS access
+ // model.
+ MO_DTPREL_G1,
+ MO_DTPREL_G0_NC,
+
+ // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+ // providing the offset of a variable from the thread-pointer. Used in
+ // initial-exec TLS model where this offset is assigned in the static thread
+ // block and thus known by the dynamic linker.
+ MO_GOTTPREL,
+ MO_GOTTPREL_LO12,
+
+ // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+ // a TLS descriptor chosen by the dynamic linker. Used for the
+ // general-dynamic and local-dynamic TLS access models where very littls is
+ // known at link-time.
+ MO_TLSDESC,
+ MO_TLSDESC_LO12,
+
+ // MO_TPREL_* - Represents a relocation referring to the offset of a
+ // variable from the thread pointer itself. Used in the local-exec TLS
+ // access model.
+ MO_TPREL_G1,
+ MO_TPREL_G0_NC,
+
+ // MO_LO12 - On a symbol operand, this represents a relocation containing
+ // lower 12 bits of the address. Used in add/sub/ldr/str.
+ MO_LO12
+ };
+}
+
+class APFloat;
+
+namespace A64Imms {
+ bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+ inline bool isFPImm(const APFloat &Val) {
+ uint32_t Imm8;
+ return isFPImm(Val, Imm8);
+ }
+
+ bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+ bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+ bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+ bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+ // We sometimes want to know whether the immediate is representable with a
+ // MOVN but *not* with a MOVZ (because that would take priority).
+ bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..2c28348d7d81
--- /dev/null
+++ b/lib/Target/AArch64/Utils/CMakeLists.txt
@@ -0,0 +1,5 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Utils
+ AArch64BaseInfo.cpp
+ )
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt
index 71e5bbc629ca..1be537598ae5 100644
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AArch64/Utils/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = CellSPUDesc
-parent = CellSPU
-required_libraries = CellSPUInfo MC
-add_to_library_groups = CellSPU
+name = AArch64Utils
+parent = AArch64
+required_libraries = Core Support
+add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile
new file mode 100644
index 000000000000..0f4a64527123
--- /dev/null
+++ b/lib/Target/AArch64/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Utils
+
+# Hack: we need to include 'main' AArch64 target directory to grab private headers
+#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 000000000000..f0d4dbe2bfb3
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 1446bbbb8e7c..80e5f37eb086 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
@@ -44,6 +45,9 @@ FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 23974ad9052c..68380847a022 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with shifter operand">;
+
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
@@ -106,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
"Is microcontroller profile ('M' series)">;
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -132,11 +141,14 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
include "ARMSchedule.td"
// ARM processor families.
+def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+ "Cortex-A5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
@@ -147,6 +159,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
[FeatureNEONForFP, FeatureT2XtPk,
FeatureVFP4, FeatureMP, FeatureHWDiv,
FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
FeatureHasSlowFPVMLx]>;
// FIXME: It has not been determined if A15 has these features.
@@ -154,6 +167,12 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
[FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+ "Cortex-R5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -219,6 +238,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
FeatureDSPThumb2]>;
// V7a Processors.
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5", CortexA8Model,
+ [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureVFP4, FeatureDSPThumb2,
+ FeatureHasRAS]>;
def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
@@ -233,6 +257,11 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
def : ProcessorModel<"cortex-a15", CortexA9Model,
[ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5", CortexA8Model,
+ [ProcR5, HasV7Ops, FeatureDB,
+ FeatureVFP3, FeatureDSPThumb2,
+ FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index d439d1d7cb7e..13ec2087938a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -25,30 +25,33 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -181,7 +184,7 @@ namespace {
const size_t TagHeaderSize = 1 + 4;
Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitBytes(CurrentVendor);
Streamer.EmitIntValue(0, 1); // '\0'
Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
@@ -191,14 +194,14 @@ namespace {
// emit each field as its type (ULEB or String)
for (unsigned int i=0; i<Contents.size(); ++i) {
AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag, 0);
+ Streamer.EmitULEB128IntValue(item.Tag);
switch (item.Type) {
default: llvm_unreachable("Invalid attribute type");
case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ Streamer.EmitULEB128IntValue(item.IntValue);
break;
case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper(), 0);
+ Streamer.EmitBytes(item.StringValue.upper());
Streamer.EmitIntValue(0, 1); // '\0'
break;
}
@@ -339,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ if(ARM::GPRPairRegClass.contains(Reg)) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ Reg = TRI->getSubReg(Reg, ARM::gsub_0);
+ }
O << ARMInstPrinter::getRegisterName(Reg);
break;
}
@@ -398,7 +406,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
}
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -527,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
if (!MO.isReg())
return true;
- const TargetRegisterClass &RC = ARM::GPRRegClass;
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
-
- unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
- RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
-
- unsigned Reg = RC.getRegister(RegIdx);
+ unsigned Reg = MO.getReg();
+ if(!ARM::GPRPairRegClass.contains(Reg))
+ return false;
+ Reg = TRI->getSubReg(Reg, ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
@@ -656,7 +662,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -666,7 +672,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// We need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
@@ -684,7 +690,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
@@ -698,6 +704,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
//===----------------------------------------------------------------------===//
@@ -1051,12 +1062,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
- MCInst BrInst;
- BrInst.setOpcode(ARM::t2B);
- BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
- BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- BrInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(BrInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B)
+ .addExpr(MBBSymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
continue;
}
// Otherwise it's an offset from the dispatch instruction. Construct an
@@ -1100,18 +1109,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps-2, OS);
}
-static void populateADROperands(MCInst &Inst, unsigned Dest,
- const MCSymbol *Label,
- unsigned pred, unsigned ccreg,
- MCContext &Ctx) {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
- Inst.addOperand(MCOperand::CreateReg(Dest));
- Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- // Add predicate operands.
- Inst.addOperand(MCOperand::CreateImm(pred));
- Inst.addOperand(MCOperand::CreateReg(ccreg));
-}
-
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
@@ -1288,129 +1285,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::tLEApcrel:
case ARM::t2LEApcrel: {
// FIXME: Need to also handle globals and externals
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetCPISymbol(MI->getOperand(1).getIndex()),
- MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
return;
}
case ARM::LEApcrelJT:
case ARM::tLEApcrelJT:
case ARM::t2LEApcrelJT: {
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
- MI->getOperand(2).getImm()),
- MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *JTIPICSymbol =
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
// Darwin call instructions are just normal call instructions with different
// clobber semantics (they clobber R9).
case ARM::BX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
case ARM::tBX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::BMOVPCRX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::Bcc);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ .addReg(0));
+
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
+ .addExpr(GVSymExpr)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::MOVi16_ga_pcrel:
@@ -1498,15 +1470,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::tADDhirr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::PICADD: {
@@ -1521,17 +1491,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::ADDrr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- // Add 's' bit operand (always reg0 for this)
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg())
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
return;
}
case ARM::PICSTR:
@@ -1567,16 +1535,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
}
- MCInst LdStInst;
- LdStInst.setOpcode(Opcode);
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- LdStInst.addOperand(MCOperand::CreateImm(0));
- // Add predicate operands.
- LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- OutStreamer.EmitInstruction(LdStInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
@@ -1606,29 +1572,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2BR_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
}
case ARM::t2TBB_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBB);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
// Make sure the next instruction is 2-byte aligned.
@@ -1637,15 +1600,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2TBH_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBH);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
@@ -1705,17 +1666,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::BR_JTadd: {
// Lower and emit the instruction itself, then the jump table following it.
// add pc, target, idx
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDrr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- // Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
// Output the data for the jump table itself
EmitJumpTable(MI);
@@ -1733,6 +1692,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
break;
}
+ case ARM::TRAPNaCl: {
+ //.long 0xe7fedef0 @ trap
+ uint32_t Val = 0xe7fedef0UL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
case ARM::tTRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
@@ -1759,75 +1725,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
MCSymbol *Label = GetARMSJLJEHLabel();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tADDi3);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3)
+ .addReg(ValReg)
// 's' bit operand
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateImm(7));
+ .addReg(ARM::CPSR)
+ .addReg(ValReg)
+ .addImm(7)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tSTRi);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi)
+ .addReg(ValReg)
+ .addReg(SrcReg)
// The offset immediate is #4. The operand value is scaled by 4 for the
// tSTR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tB);
- TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB)
+ .addExpr(SymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
OutStreamer.EmitLabel(Label);
return;
}
@@ -1843,69 +1791,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::STRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::Int_eh_sjlj_longjmp: {
@@ -1915,48 +1847,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::tInt_eh_sjlj_longjmp: {
@@ -1967,60 +1886,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
// The offset immediate is #8. The operand value is scaled by 4 for the
// tLDR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(2));
+ .addImm(2)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index c875b2cbdffe..c945e4f28699 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
+//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// ARM Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
#ifndef ARMASMPRINTER_H
#define ARMASMPRINTER_H
@@ -54,7 +50,7 @@ public:
}
virtual const char *getPassName() const LLVM_OVERRIDE {
- return "ARM Assembly Printer";
+ return "ARM Assembly / Object Emitter";
}
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
@@ -121,7 +117,7 @@ private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
- MCSymbol *GetARMSJLJEHLabel(void) const;
+ MCSymbol *GetARMSJLJEHLabel() const;
MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 3c7bb24f42f8..9e68ff44890e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,9 +18,7 @@
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,12 +27,14 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "ARMGenInstrInfo.inc"
@@ -106,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
const InstrItineraryData *II = TM->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
- return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -115,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
return (ScheduleHazardRecognizer *)
new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
- return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
MachineInstr *
@@ -464,8 +464,9 @@ PredicateInstruction(MachineInstr *MI,
unsigned Opc = MI->getOpcode();
if (isUncondBranchOpcode(Opc)) {
MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
- MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
- MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
return true;
}
@@ -1124,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -1154,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// All clear, widen the COPY.
DEBUG(dbgs() << "widening: " << *MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
// Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
@@ -1165,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
MI->setDesc(get(ARM::VMOVD));
MI->getOperand(0).setReg(DstRegD);
MI->getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MachineInstrBuilder(MI));
+ AddDefaultPred(MIB);
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
MI->getOperand(1).setIsUndef();
- MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+ MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
@@ -1269,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB,
MachineInstr *
ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
- MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+ MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
switch(Orig->getOpcode()) {
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
@@ -1373,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
/// only return true if the base pointers are the same and the only differences
/// between the two addresses is the offset. It also returns the offsets by
/// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const {
@@ -1447,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
/// from the common base address. It returns true if it decides it's desirable
/// to schedule the two loads together. "NumLoads" is the number of loads that
/// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
@@ -1598,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return NULL;
- MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (!MI)
return NULL;
// After swapping the MOVCC operands, also invert the condition.
@@ -1607,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
}
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1710,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// same register as operand 0.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
- NewMI->addOperand(FalseReg);
+ NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// The caller will erase MI, but not DefMI.
@@ -2711,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
if (Subtarget.isSwift()) {
- // rdar://8402126
int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
switch (Opc) {
default: break;
@@ -3321,8 +3328,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize))
+ if (MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
--Latency;
}
return Latency;
@@ -3726,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
- // A9-like cores are particularly picky about mixing the two and want these
+ // CortexA9 is particularly picky about mixing the two and wants these
// converted.
- if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR ||
MI->getOpcode() == ARM::VMOVS))
@@ -3813,7 +3821,7 @@ void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
- MachineInstrBuilder MIB(MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
switch (MI->getOpcode()) {
default:
@@ -4015,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
//
// FCONSTD can be used as a dependency-breaking instruction.
-
-
unsigned ARMBaseInstrInfo::
getPartialRegUpdateClearance(const MachineInstr *MI,
unsigned OpNum,
const TargetRegisterInfo *TRI) const {
- // Only Swift has partial register update problems.
- if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ if (!SwiftPartialUpdateClearance ||
+ !(Subtarget.isSwift() || Subtarget.isCortexA15()))
return 0;
assert(TRI && "Need TRI instance");
@@ -4038,7 +4044,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
case ARM::VLDRS:
case ARM::FCONSTS:
case ARM::VMOVSR:
- // rdar://problem/8791586
case ARM::VMOVv8i8:
case ARM::VMOVv4i16:
case ARM::VMOVv2i32:
@@ -4049,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
// Explicitly reads the dependency.
case ARM::VLD1LNd32:
- UseOp = 1;
+ UseOp = 3;
break;
default:
return 0;
@@ -4118,3 +4123,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+ // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+ if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+ ((ShImm == 1 || ShImm == 2) &&
+ ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+ return true;
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 6f38e35124eb..7c107bb41951 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,10 +15,10 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "ARMGenInstrInfo.inc"
@@ -314,6 +314,10 @@ public:
bool canCauseFpMLxStall(unsigned Opcode) const {
return MLxHazardOpcodes.count(Opcode);
}
+
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isSwiftFastImmShift(const MachineInstr *MI) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e5b300fc7792..b6b27f849a23 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -18,44 +18,34 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
#define GET_REGINFO_TARGET_DESC
#include "ARMGenRegisterInfo.inc"
using namespace llvm;
-static cl::opt<bool>
-ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
- cl::desc("Force use of virtual base registers for stack load/store"));
-static cl::opt<bool>
-EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
- cl::desc("Enable pre-regalloc stack frame index allocation"));
-static cl::opt<bool>
-EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
- cl::desc("Enable use of a base pointer for complex stack frames"));
-
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -173,154 +163,63 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-/// getRawAllocationOrder - Returns the register allocation order for a
-/// specified register class with a target-dependent hint.
-ArrayRef<uint16_t>
-ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
- unsigned HintType, unsigned HintReg,
- const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- // Alternative register allocation orders when favoring even / odd registers
- // of register pairs.
-
- // No FP, R9 is available.
- static const uint16_t GPREven1[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
- ARM::R9, ARM::R11
- };
- static const uint16_t GPROdd1[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
- ARM::R8, ARM::R10
- };
-
- // FP is R7, R9 is available.
- static const uint16_t GPREven2[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
- ARM::R9, ARM::R11
- };
- static const uint16_t GPROdd2[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
- ARM::R8, ARM::R10
- };
-
- // FP is R11, R9 is available.
- static const uint16_t GPREven3[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
- ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
- ARM::R9
- };
- static const uint16_t GPROdd3[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
- ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
- ARM::R8
- };
-
- // No FP, R9 is not available.
- static const uint16_t GPREven4[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
- ARM::R11
- };
- static const uint16_t GPROdd4[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
- ARM::R10
- };
-
- // FP is R7, R9 is not available.
- static const uint16_t GPREven5[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
- ARM::R11
- };
- static const uint16_t GPROdd5[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
- ARM::R10
- };
-
- // FP is R11, R9 is not available.
- static const uint16_t GPREven6[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6,
- ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
- };
- static const uint16_t GPROdd6[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7,
- ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
- };
-
- // We only support even/odd hints for GPR and rGPR.
- if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
- return RC->getRawAllocationOrder(MF);
-
- if (HintType == ARMRI::RegPairEven) {
- if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
- // It's no longer possible to fulfill this hint. Return the default
- // allocation order.
- return RC->getRawAllocationOrder(MF);
-
- if (!TFI->hasFP(MF)) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven1);
- else
- return makeArrayRef(GPREven4);
- } else if (FramePtr == ARM::R7) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven2);
- else
- return makeArrayRef(GPREven5);
- } else { // FramePtr == ARM::R11
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven3);
- else
- return makeArrayRef(GPREven6);
- }
- } else if (HintType == ARMRI::RegPairOdd) {
- if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
- // It's no longer possible to fulfill this hint. Return the default
- // allocation order.
- return RC->getRawAllocationOrder(MF);
-
- if (!TFI->hasFP(MF)) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd1);
- else
- return makeArrayRef(GPROdd4);
- } else if (FramePtr == ARM::R7) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd2);
- else
- return makeArrayRef(GPROdd5);
- } else { // FramePtr == ARM::R11
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd3);
- else
- return makeArrayRef(GPROdd6);
- }
- }
- return RC->getRawAllocationOrder(MF);
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+ for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+ if (ARM::GPRPairRegClass.contains(*Supers))
+ return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+ return 0;
}
-/// ResolveRegAllocHint - Resolves the specified register allocation hint
-/// to a physical register. Returns the physical register if it is successful.
-unsigned
-ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
- const MachineFunction &MF) const {
- if (Reg == 0 || !isPhysicalRegister(Reg))
- return 0;
- if (Type == 0)
- return Reg;
- else if (Type == (unsigned)ARMRI::RegPairOdd)
- // Odd register.
- return getRegisterPairOdd(Reg, MF);
- else if (Type == (unsigned)ARMRI::RegPairEven)
- // Even register.
- return getRegisterPairEven(Reg, MF);
- return 0;
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ unsigned Odd;
+ switch (Hint.first) {
+ case ARMRI::RegPairEven:
+ Odd = 0;
+ break;
+ case ARMRI::RegPairOdd:
+ Odd = 1;
+ break;
+ default:
+ TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ return;
+ }
+
+ // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+ // Check if the other part of the pair has already been assigned, and provide
+ // the paired register as the first hint.
+ unsigned PairedPhys = 0;
+ if (VRM && VRM->hasPhys(Hint.second)) {
+ PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
+ if (PairedPhys && MRI.isReserved(PairedPhys))
+ PairedPhys = 0;
+ }
+
+ // First prefer the paired physreg.
+ if (PairedPhys &&
+ std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+ Hints.push_back(PairedPhys);
+
+ // Then prefer even or odd registers.
+ for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+ unsigned Reg = Order[I];
+ if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+ continue;
+ // Don't provide hints that are paired to a reserved register.
+ unsigned Paired = getPairedGPR(Reg, !Odd, this);
+ if (!Paired || MRI.isReserved(Paired))
+ continue;
+ Hints.push_back(Reg);
+ }
}
void
@@ -371,9 +270,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (!EnableBasePointer)
- return false;
-
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
// emergency spill slot.
@@ -419,8 +315,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// pointer adjustments around calls.
if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
- if (!EnableBasePointer)
- return false;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
return MRI->canReserveReg(BasePtr);
@@ -433,7 +327,8 @@ needsStackRealignment(const MachineFunction &MF) const {
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
- F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -464,114 +359,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
}
-unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
- const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- switch (Reg) {
- default: break;
- // Return 0 if either register of the pair is a special register.
- // So no R12, etc.
- case ARM::R1: return ARM::R0;
- case ARM::R3: return ARM::R2;
- case ARM::R5: return ARM::R4;
- case ARM::R7:
- return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
- ? 0 : ARM::R6;
- case ARM::R9: return MRI.isReserved(ARM::R9) ? 0 :ARM::R8;
- case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10;
-
- case ARM::S1: return ARM::S0;
- case ARM::S3: return ARM::S2;
- case ARM::S5: return ARM::S4;
- case ARM::S7: return ARM::S6;
- case ARM::S9: return ARM::S8;
- case ARM::S11: return ARM::S10;
- case ARM::S13: return ARM::S12;
- case ARM::S15: return ARM::S14;
- case ARM::S17: return ARM::S16;
- case ARM::S19: return ARM::S18;
- case ARM::S21: return ARM::S20;
- case ARM::S23: return ARM::S22;
- case ARM::S25: return ARM::S24;
- case ARM::S27: return ARM::S26;
- case ARM::S29: return ARM::S28;
- case ARM::S31: return ARM::S30;
-
- case ARM::D1: return ARM::D0;
- case ARM::D3: return ARM::D2;
- case ARM::D5: return ARM::D4;
- case ARM::D7: return ARM::D6;
- case ARM::D9: return ARM::D8;
- case ARM::D11: return ARM::D10;
- case ARM::D13: return ARM::D12;
- case ARM::D15: return ARM::D14;
- case ARM::D17: return ARM::D16;
- case ARM::D19: return ARM::D18;
- case ARM::D21: return ARM::D20;
- case ARM::D23: return ARM::D22;
- case ARM::D25: return ARM::D24;
- case ARM::D27: return ARM::D26;
- case ARM::D29: return ARM::D28;
- case ARM::D31: return ARM::D30;
- }
-
- return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
- const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- switch (Reg) {
- default: break;
- // Return 0 if either register of the pair is a special register.
- // So no R12, etc.
- case ARM::R0: return ARM::R1;
- case ARM::R2: return ARM::R3;
- case ARM::R4: return ARM::R5;
- case ARM::R6:
- return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
- ? 0 : ARM::R7;
- case ARM::R8: return MRI.isReserved(ARM::R9) ? 0 :ARM::R9;
- case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11;
-
- case ARM::S0: return ARM::S1;
- case ARM::S2: return ARM::S3;
- case ARM::S4: return ARM::S5;
- case ARM::S6: return ARM::S7;
- case ARM::S8: return ARM::S9;
- case ARM::S10: return ARM::S11;
- case ARM::S12: return ARM::S13;
- case ARM::S14: return ARM::S15;
- case ARM::S16: return ARM::S17;
- case ARM::S18: return ARM::S19;
- case ARM::S20: return ARM::S21;
- case ARM::S22: return ARM::S23;
- case ARM::S24: return ARM::S25;
- case ARM::S26: return ARM::S27;
- case ARM::S28: return ARM::S29;
- case ARM::S30: return ARM::S31;
-
- case ARM::D0: return ARM::D1;
- case ARM::D2: return ARM::D3;
- case ARM::D4: return ARM::D5;
- case ARM::D6: return ARM::D7;
- case ARM::D8: return ARM::D9;
- case ARM::D10: return ARM::D11;
- case ARM::D12: return ARM::D13;
- case ARM::D14: return ARM::D15;
- case ARM::D16: return ARM::D17;
- case ARM::D18: return ARM::D19;
- case ARM::D20: return ARM::D21;
- case ARM::D22: return ARM::D23;
- case ARM::D24: return ARM::D25;
- case ARM::D26: return ARM::D27;
- case ARM::D28: return ARM::D29;
- case ARM::D30: return ARM::D31;
- }
-
- return 0;
-}
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ARMBaseRegisterInfo::
@@ -611,65 +398,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
requiresVirtualBaseRegisters(const MachineFunction &MF) const {
- return EnableLocalStackAlloc;
-}
-
-static void
-emitSPUpdate(bool isARM,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
-}
-
-
-void ARMBaseRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- assert(!AFI->isThumb1OnlyFunction() &&
- "This eliminateCallFramePseudoInstr does not support Thumb1!");
- bool isARM = !AFI->isThumbFunction();
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- int PIdx = Old->findFirstPredOperandIdx();
- ARMCC::CondCodes Pred = (PIdx == -1)
- ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
- if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old->getOperand(2).getReg();
- emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
- } else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old->getOperand(3).getReg();
- assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
- emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
- }
- }
- }
- MBB.erase(I);
+ return true;
}
int64_t ARMBaseRegisterInfo::
@@ -750,8 +479,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
case ARM::VLDRS: case ARM::VLDRD:
case ARM::VSTRS: case ARM::VSTRD:
case ARM::tSTRspi: case ARM::tLDRspi:
- if (ForceAllBaseRegAlloc)
- return true;
break;
default:
return false;
@@ -933,8 +660,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
- unsigned i = 0;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -943,13 +670,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
@@ -959,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(TFI->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
@@ -971,18 +692,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
if (!AFI->isThumbFunction())
- Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+ Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
else {
assert(AFI->isThumb2Function());
- Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+ Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
}
if (Done)
return;
@@ -1002,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
if (Offset == 0)
// Must be addrmode4/6.
- MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
if (!AFI->isThumbFunction())
@@ -1014,6 +735,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
// Update the original instruction to use the scratch register.
- MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
}
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index e2bdd046db57..725033b7e573 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -111,12 +111,11 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
- ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
- unsigned HintType, unsigned HintReg,
- const MachineFunction &MF) const;
-
- unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
- const MachineFunction &MF) const;
+ void getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const;
void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const;
@@ -169,17 +168,9 @@ public:
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
- virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
-
-private:
- unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
-
- unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 0bd1c3ee2feb..e6e8c3d5fac6 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -18,8 +18,8 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 6adbf4f27e6e..5e8e1739a984 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,16 +14,13 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
-#include "ARMConstantPoolValue.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -31,7 +28,10 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -371,12 +371,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
}
bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
+
+ assert((Target.getRelocationModel() != Reloc::Default ||
+ Target.getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
- JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
- II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
- TD = MF.getTarget().getDataLayout();
+
+ JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
+ II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
+ TD = Target.getDataLayout();
+
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index a57368fdb5d8..4891609b336f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,23 +16,23 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
using namespace llvm;
@@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
if (CPEBB->empty()) {
BBInfo[CPEBB->getNumber()].Size = 0;
- // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ // This block no longer needs to be aligned.
CPEBB->setAlignment(0);
} else
// Entries are sorted by descending alignment, so realign from the front.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index fa3226e37eb9..4e703ec3c1a8 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,11 +13,11 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
@@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
bool AddCurrentAddress)
: ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
AddCurrentAddress),
- S(strdup(s)) {}
-
-ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
- free((void*)S);
-}
+ S(s) {}
ARMConstantPoolSymbol *
ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
@@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
}
-static bool CPV_streq(const char *S1, const char *S2) {
- if (S1 == S2)
- return true;
- if (S1 && S2 && strcmp(S1, S2) == 0)
- return true;
- return false;
-}
-
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
@@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
if (!APS) continue;
- if (CPV_streq(APS->S, S) && equals(APS))
+ if (APS->S == S && equals(APS))
return i;
}
}
@@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
- return ACPS && CPV_streq(ACPS->S, S) &&
- ARMConstantPoolValue::hasSameValue(ACPV);
+ return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
}
void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
- ID.AddPointer(S);
+ ID.AddString(S);
ARMConstantPoolValue::addSelectionDAGCSEId(ID);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index ae531c4ea888..93812fe6bb37 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -161,19 +161,17 @@ public:
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
/// symbols.
class ARMConstantPoolSymbol : public ARMConstantPoolValue {
- const char *S; // ExtSymbol being loaded.
+ const std::string S; // ExtSymbol being loaded.
ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
public:
- ~ARMConstantPoolSymbol();
-
static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
unsigned ID, unsigned char PCAdj);
- const char *getSymbol() const { return S; }
+ const char *getSymbol() const { return S.c_str(); }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 8c45e0b98d8e..beb843ca9aa8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,10 +23,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
static cl::opt<bool>
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6611862ca071..29fcd4009af3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,31 +16,31 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
-#include "ARMTargetMachine.h"
-#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -146,6 +146,7 @@ class ARMFastISel : public FastISel {
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
+ virtual bool FastLowerArguments();
private:
#include "ARMGenFastISel.inc"
@@ -178,23 +179,24 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
- void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
- bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
- unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
- unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
- unsigned ARMMaterializeInt(const Constant *C, EVT VT);
- unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
- unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
- unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ unsigned Alignment);
+ unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
- unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
// Call handling routines.
private:
@@ -220,7 +222,7 @@ class ARMFastISel : public FastISel {
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
- void AddLoadStoreOperands(EVT VT, Address &Addr,
+ void AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3);
};
@@ -486,7 +488,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
// TODO: Don't worry about 64-bit now, but when this is fixed remove the
// checks from the various callers.
-unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::f64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -496,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
return MoveReg;
}
-unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::i64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -509,7 +511,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
// For double width floating point we need to materialize two constants
// (the high and the low) into integer registers then use a move to get
// the combined constant into an FP reg.
-unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
const APFloat Val = CFP->getValueAPF();
bool is64bit = VT == MVT::f64;
@@ -553,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return false;
@@ -563,7 +565,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
const ConstantInt *CI = cast<ConstantInt>(C);
if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
- unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ImmReg)
.addImm(CI->getZExtValue()));
@@ -613,7 +617,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32) return 0;
@@ -716,10 +720,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
}
unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
- if (!VT.isSimple()) return 0;
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return ARMMaterializeFP(CFP, VT);
@@ -895,12 +900,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
-
- assert(VT.isSimple() && "Non-simple types are invalid here!");
-
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
bool needsLowering = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: llvm_unreachable("Unhandled load/store type!");
case MVT::i1:
case MVT::i8:
@@ -951,13 +953,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
}
}
-void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
- if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
Addr.Offset /= 4;
// Frame base works a bit differently. Handle it separately.
@@ -1000,14 +1001,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment, bool isZExt, bool allocReg) {
- assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
bool needVMOV = false;
const TargetRegisterClass *RC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1:
@@ -1124,11 +1124,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
@@ -1402,8 +1402,9 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt) {
Type *Ty = Src1Value->getType();
- EVT SrcVT = TLI.getValueType(Ty, true);
- if (!SrcVT.isSimple()) return false;
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple()) return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
@@ -1440,7 +1441,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned CmpOpc;
bool isICmp = true;
bool needsExt = false;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
@@ -1592,7 +1593,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -1601,8 +1605,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
- EVT DestVT = MVT::i32;
- SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
/*isZExt*/!isSigned);
if (SrcReg == 0) return false;
}
@@ -1665,7 +1668,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
// Things need to be register sized for register moves.
if (VT != MVT::i32) return false;
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned CondReg = getRegForValue(I->getOperand(0));
if (CondReg == 0) return false;
@@ -1698,14 +1700,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
.addReg(CondReg).addImm(0));
unsigned MovCCOpc;
+ const TargetRegisterClass *RC;
if (!UseImm) {
+ RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
} else {
- if (!isNegativeImm) {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+ if (!isNegativeImm)
MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- } else {
+ else
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
- }
}
unsigned ResultReg = createResultReg(RC);
if (!UseImm)
@@ -1807,7 +1811,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
- EVT VT = TLI.getValueType(I->getType(), true);
+ EVT FPVT = TLI.getValueType(I->getType(), true);
+ if (!FPVT.isSimple()) return false;
+ MVT VT = FPVT.getSimpleVT();
// We can get here in the case when we want to use NEON for our fp
// operations, but can't figure out how to. Just use the vfp instructions
@@ -1838,7 +1844,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
unsigned Op2 = getRegForValue(I->getOperand(1));
if (Op2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
@@ -2051,7 +2057,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
if (RVLocs.size() == 2 && RetVT == MVT::f64) {
// For this move we copy into two registers and then move into the
// double fp reg we want.
- EVT DestVT = RVLocs[0].getValVT();
+ MVT DestVT = RVLocs[0].getValVT();
const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2066,7 +2072,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UpdateValueMap(I, ResultReg);
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
- EVT CopyVT = RVLocs[0].getValVT();
+ MVT CopyVT = RVLocs[0].getValVT();
// Special handling for extended integers.
if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
@@ -2094,11 +2100,13 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2125,8 +2133,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVVT = TLI.getValueType(RV->getType());
- EVT DestVT = VA.getValVT();
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple()) return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
@@ -2151,13 +2161,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
DstReg).addReg(SrcReg);
- // Mark the register as live out of the function.
- MRI.addLiveOut(VA.getLocReg());
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
}
unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(RetOpc)));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc));
+ AddOptionalDefs(MIB);
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
@@ -2171,7 +2184,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
GlobalValue::ExternalLinkage, 0, Name);
- return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
+ EVT LCREVT = TLI.getValueType(GV->getType());
+ if (!LCREVT.isSimple()) return 0;
+ return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
// A quick function that will emit a call for a named libcall in F with the
@@ -2280,6 +2295,9 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall()) return false;
+
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
@@ -2328,16 +2346,16 @@ bool ARMFastISel::SelectCall(const Instruction *I,
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attributes::InReg) ||
- CS.paramHasAttr(AttrInd, Attributes::StructRet) ||
- CS.paramHasAttr(AttrInd, Attributes::Nest) ||
- CS.paramHasAttr(AttrInd, Attributes::ByVal))
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2419,21 +2437,29 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
}
bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
- uint64_t Len) {
+ uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!ARMIsMemCpySmall(Len))
return false;
- // We don't care about alignment here since we just emit integer accesses.
while (Len) {
MVT VT;
- if (Len >= 4)
- VT = MVT::i32;
- else if (Len >= 2)
- VT = MVT::i16;
- else {
- assert(Len == 1);
- VT = MVT::i8;
+ if (!Alignment || Alignment >= 4) {
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert (Len == 1 && "Expected a length of 1!");
+ VT = MVT::i8;
+ }
+ } else {
+ // Bound based on alignment.
+ if (Len >= 2 && Alignment == 2)
+ VT = MVT::i16;
+ else {
+ VT = MVT::i8;
+ }
}
bool RV;
@@ -2512,7 +2538,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
!ARMComputeAddress(MTI.getRawSource(), Src))
return false;
- if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+ unsigned Alignment = MTI.getAlignment();
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
@@ -2541,7 +2568,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
+ Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
return true;
}
}
@@ -2570,18 +2598,19 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
return true;
}
-unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return 0;
unsigned Opc;
bool isBoolZext = false;
- if (!SrcVT.isSimple()) return 0;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
if (isZExt)
Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
else
@@ -2589,6 +2618,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
break;
case MVT::i8:
if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
if (isZExt)
Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
else
@@ -2596,6 +2626,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
break;
case MVT::i1:
if (isZExt) {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
isBoolZext = true;
break;
@@ -2603,7 +2634,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
return 0;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg);
@@ -2622,14 +2653,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
- EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(SrcTy, true);
- DestVT = TLI.getValueType(DestTy, true);
-
bool isZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple()) return false;
+ if (!DestEVT.isSimple()) return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
@@ -2809,7 +2844,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
- unsigned Align, EVT VT) {
+ unsigned Align, MVT VT) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolConstant *CPV =
ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
@@ -2849,6 +2884,80 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
return DestReg2;
}
+bool ARMFastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ default:
+ return false;
+ case CallingConv::Fast:
+ case CallingConv::C:
+ case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ break;
+ }
+
+ // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
+ // which are passed in r0 - r3.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 4)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ default:
+ return false;
+ }
+ }
+
+
+ static const uint16_t GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ Idx = 0;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ unsigned SrcReg = GPRArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+
+ return true;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 9392497fd07d..7a02adf24633 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,17 +15,16 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -120,13 +119,14 @@ static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- ARMCC::AL, 0, TII, MIFlags);
+ Pred, PredReg, TII, MIFlags);
else
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- ARMCC::AL, 0, TII, MIFlags);
+ Pred, PredReg, TII, MIFlags);
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -696,7 +696,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet) {
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
MI = MIB;
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
return FnSize;
}
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
/// estimateRSStackSizeLimit - Look at each instruction that references stack
/// frames and return the stack size limit beyond which some of these
/// instructions will require a scratch register during their expansion later.
@@ -1153,7 +1101,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return;
// We are planning to use NEON instructions vst1 / vld1.
@@ -1234,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = estimateStackSize(MF);
+ unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
MRI.setPhysRegUsed(ARM::R4);
}
@@ -1329,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// worth the effort and added fragility?
bool BigStack =
(RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF, this)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@@ -1418,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
@@ -1430,3 +1380,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
AFI->setLRIsSpilledForFarJump(true);
}
}
+
+
+void ARMFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index a1c2b93562c9..efa255a5574a 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -70,6 +70,11 @@ public:
unsigned LdrOpc, bool isVarArg, bool NoGap,
bool(*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const;
+
+ virtual void eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index efd6d2b8399e..2c51de23f7dc 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -16,24 +16,25 @@
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -78,6 +79,8 @@ public:
return "ARM Instruction Selection";
}
+ virtual void PreprocessISelDAG();
+
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
inline SDValue getI32Imm(unsigned Imm) {
@@ -255,6 +258,8 @@ private:
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
+ SDNode *SelectInlineAsm(SDNode *N);
+
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@@ -265,15 +270,16 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- // Form pairs of consecutive S, D, or Q registers.
- SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+ // Form pairs of consecutive R, S, D, or Q registers.
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
// Form sequences of 4 consecutive S, D, or Q registers.
- SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
@@ -326,6 +332,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale,
return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
}
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->hasV6T2Ops())
+ return;
+
+ bool isThumb2 = Subtarget->isThumb();
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (N->getOpcode() != ISD::ADD)
+ continue;
+
+ // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+ // leading zeros, followed by consecutive set bits, followed by 1 or 2
+ // trailing zeros, e.g. 1020.
+ // Transform the expression to
+ // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+ // of trailing zeros of c2. The left shift would be folded as an shifter
+ // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+ // node (UBFX).
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned And_imm = 0;
+ if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+ if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+ std::swap(N0, N1);
+ }
+ if (!And_imm)
+ continue;
+
+ // Check if the AND mask is an immediate of the form: 000.....1111111100
+ unsigned TZ = CountTrailingZeros_32(And_imm);
+ if (TZ != 1 && TZ != 2)
+ // Be conservative here. Shifter operands aren't always free. e.g. On
+ // Swift, left shifter operand of 1 / 2 for free but others are not.
+ // e.g.
+ // ubfx r3, r1, #16, #8
+ // ldr.w r3, [r0, r3, lsl #2]
+ // vs.
+ // mov.w r9, #1020
+ // and.w r2, r9, r1, lsr #14
+ // ldr r2, [r0, r2]
+ continue;
+ And_imm >>= TZ;
+ if (And_imm & (And_imm + 1))
+ continue;
+
+ // Look for (and (srl X, c1), c2).
+ SDValue Srl = N1.getOperand(0);
+ unsigned Srl_imm = 0;
+ if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+ (Srl_imm <= 2))
+ continue;
+
+ // Make sure first operand is not a shifter operand which would prevent
+ // folding of the left shift.
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (isThumb2) {
+ if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ continue;
+ } else {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+ SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+ continue;
+ }
+
+ // Now make the transformation.
+ Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl.getOperand(0),
+ CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+ N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ Srl, CurDAG->getConstant(And_imm, MVT::i32));
+ N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1, CurDAG->getConstant(TZ, MVT::i32));
+ CurDAG->UpdateNodeOperands(N, N0, N1);
+ }
+}
+
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
/// least on current ARM implementations) which should be avoidded.
@@ -1444,9 +1531,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
-/// PairSRegs - Form a D register from a pair of S registers.
-///
-SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
@@ -1456,9 +1553,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairDRegs - Form a quad register from a pair of D registers.
-///
-SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
@@ -1467,9 +1563,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
-///
-SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
@@ -1478,9 +1573,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// QuadSRegs - Form 4 consecutive S registers.
-///
-SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
@@ -1494,9 +1588,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadDRegs - Form 4 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
@@ -1509,9 +1602,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadQRegs - Form 4 consecutive Q registers.
-///
-SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
@@ -1784,7 +1876,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
- SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
// If it's a vst3, form a quad D-register and leave the last part as
@@ -1792,13 +1884,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(Vec0Idx + 3);
- SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(Vec0Idx);
SDValue Q1 = N->getOperand(Vec0Idx + 1);
- SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
}
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
@@ -1840,7 +1932,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
- SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers. This is always an updating store, so that it
// provides the address to the second store for the odd subregs.
@@ -1950,18 +2042,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2) {
if (is64BitVector)
- SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else
- SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
if (is64BitVector)
- SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
else
- SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
}
Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
@@ -2087,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V0 = N->getOperand(FirstTblReg + 0);
SDValue V1 = N->getOperand(FirstTblReg + 1);
if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
// If it's a vtbl3, form a quad D-register and leave the last part as
@@ -2095,7 +2187,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
SmallVector<SDValue, 6> Ops;
@@ -2113,10 +2205,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
if (!Subtarget->hasV6T2Ops())
return NULL;
- unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ unsigned Opc = isSigned
+ ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
-
// For unsigned extracts, check for a shift right and mask
unsigned And_imm = 0;
if (N->getOpcode() == ISD::AND) {
@@ -2134,7 +2226,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// Note: The width operand is encoded as width-1.
unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
+
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+ // It's cheaper to use a right shift to extract the top bits.
+ if (Subtarget->isThumb()) {
+ Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ // ARM models shift instructions as MOVsi with shifter operand.
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+ SDValue ShOpc =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+ MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ }
+
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
@@ -2411,7 +2525,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
- return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
@@ -2441,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
case ISD::XOR: {
// Select special operations if XOR node forms integer ABS pattern
SDNode *ResNode = SelectABSOp(N);
@@ -2790,13 +2910,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
- return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2)
- return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
- return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3));
}
@@ -3009,17 +3129,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue Chain = N->getOperand(0);
- unsigned NewOpc = ARM::LDREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2LDREXD;
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
// arm_ldrexd returns a i64 value in {i32, i32}
std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i32);
- ResTys.push_back(MVT::i32);
+ if (isThumb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ } else
+ ResTys.push_back(MVT::Untyped);
ResTys.push_back(MVT::Other);
- // place arguments in the right order
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
@@ -3032,30 +3154,33 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode ldrexd to always
- // use the pair [R0, R1] to hold the load result.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
- SDValue(Ld, 0), SDValue(0,0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
- SDValue(Ld, 1), Chain.getValue(1));
-
// Remap uses.
- SDValue Glue = Chain.getValue(1);
+ SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R0, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 0);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
ReplaceUses(SDValue(N, 0), Result);
}
if (!SDValue(N, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R1, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 1);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+ Result = SDValue(ResNode,0);
+ }
ReplaceUses(SDValue(N, 1), Result);
}
-
- ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+ ReplaceUses(SDValue(N, 2), OutChain);
return NULL;
}
@@ -3066,38 +3191,25 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Val1 = N->getOperand(3);
SDValue MemAddr = N->getOperand(4);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode strexd to always
- // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
- SDValue(0, 0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
-
- SDValue Glue = Chain.getValue(1);
- Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R2, MVT::i32, Glue);
- Glue = Val0.getValue(1);
- Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R3, MVT::i32, Glue);
-
// Store exclusive double return a i32 value which is the return status
// of the issued store.
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i32);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = { MVT::i32, MVT::Other };
- // place arguments in the right order
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
- Ops.push_back(Val0);
- Ops.push_back(Val1);
+ if (isThumb) {
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ } else
+ // arm_strexd uses GPRPair.
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- unsigned NewOpc = ARM::STREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2STREXD;
+ unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
Ops.size());
@@ -3295,7 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Form a REG_SEQUENCE to force register allocation.
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
SmallVector<SDValue, 6> Ops;
Ops.push_back(RegSeq);
@@ -3325,11 +3437,152 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectAtomic64(N, ARM::ATOMSWAP6432);
case ARMISD::ATOMCMPXCHG64_DAG:
return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+
+ case ARMISD::ATOMMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMIN6432);
+ case ARMISD::ATOMUMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMIN6432);
+ case ARMISD::ATOMMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMAX6432);
+ case ARMISD::ATOMUMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMAX6432);
}
return SelectCode(N);
}
+SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(
+ N->getOperand(InlineAsm::Op_AsmString));
+ StringRef AsmString = StringRef(S->getSymbol());
+
+ // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+ // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
+ // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
+ // respectively. Since there is no constraint to explicitly specify a
+ // reg pair, we search %H operand inside the asm string. If it is found, the
+ // transformation below enforces a GPRPair reg class for "%r" for 64-bit data.
+ if (AsmString.find(":H}") == StringRef::npos)
+ return NULL;
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Glue = N->getOperand(NumOps-1);
+
+ // Glue node will be appended late.
+ for(unsigned i = 0; i < NumOps -1; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag);
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2)
+ continue;
+
+ assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+ GU = T1.getNode();
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return NULL;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
+ AsmNodeOperands.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+
bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ff99b04078e8..bb26090d2d8d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,14 +23,8 @@
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,14 +34,20 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -504,6 +504,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMA, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
@@ -515,8 +516,29 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
+ // Mark v2f32 intrinsics.
+ setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// Custom handling for some quad-vector types to detect VMULL.
@@ -539,6 +561,33 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
+ // NEON does not have single instruction CTPOP for vectors with element
+ // types wider than 8-bits. However, custom lowering can leverage the
+ // v8i8/v16i8 vcnt instruction.
+ setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+
+ // NEON only has FMA instructions as of VFP4.
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+ }
+
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -688,7 +737,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
@@ -762,6 +815,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
@@ -814,18 +869,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
- maxStoresPerMemset = 16;
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- benefitFromCodePlacementOpt = true;
-
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -841,10 +897,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ARMTargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
// Use DPR as representative register class for all floating point
@@ -1024,7 +1080,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
@@ -1557,7 +1613,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
@@ -1594,8 +1650,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::MinSize);
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -1875,6 +1931,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return true;
}
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+ isVarArg));
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -1893,15 +1960,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
isVarArg));
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
@@ -1930,10 +1991,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(1), Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
@@ -1946,6 +2009,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
Flag);
@@ -1955,15 +2019,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- SDValue result;
+ // Update chain and glue.
+ RetOps[0] = Chain;
if (Flag.getNode())
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else // Return Void
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
- return result;
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+ RetOps.data(), RetOps.size());
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2214,8 +2279,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (RelocM == Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV,
@@ -2259,8 +2323,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// FIXME: Enable this for static codegen when tool issues are fixed. Also
// update ARMFastISel::ARMMaterializeGV.
@@ -2288,6 +2350,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
} else {
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
@@ -2368,7 +2431,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2543,7 +2605,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
// The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
// provided by our caller, and store the unallocated registers there.
// If this is a variadic function, the va_list pointer will begin with
@@ -2628,7 +2690,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
-
+
SmallVector<SDValue, 16> ArgValues;
int lastInsIndex = -1;
SDValue ArgValue;
@@ -2743,7 +2805,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
} else {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
}
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
@@ -3379,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -3532,6 +3635,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated. The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+/// [b0 b1 b2 b3 b4 b5 b6 b7]
+/// +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+ SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+ return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand. We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
+/// v4i16:Extracted = [k0 k1 k2 k3 ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+ BitCounts, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+ }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand. The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input = [v0 v1 ] (vi: 32-bit elements)
+/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ]
+/// [k0 k1 k2 k3 ]
+/// N1 =+[k1 k0 k3 k2 ]
+/// [k0 k2 k1 k3 ]
+/// N2 =+[k1 k3 k0 k2 ]
+/// [k0 k2 k1 k3 ]
+/// Extended =+[k1 k3 k0 k2 ]
+/// [k0 k2 ]
+/// Extracted=+[k1 k3 ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+ SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+ SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+ SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+ VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ if (VT.getVectorElementType() == MVT::i32)
+ return lowerCTPOP32BitElements(N, DAG);
+ else
+ return lowerCTPOP16BitElements(N, DAG);
+}
+
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -4153,6 +4364,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@@ -4247,7 +4473,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
-
+
// Is this value dominant? (takes up more than half of the lanes)
if (++Count > (NumElts / 2)) {
hasDominantValue = true;
@@ -4275,8 +4501,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
- // just use VDUPLANE.
- if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // just use VDUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the VDUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
@@ -4291,12 +4520,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
Value, DAG.getConstant(index, MVT::i32)),
DAG.getConstant(index, MVT::i32));
- } else {
+ } else
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
- }
- }
- else
+ } else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
if (!usesOnlyOneValue) {
@@ -4328,7 +4555,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (usesOnlyOneValue) {
SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
if (isConstant && Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
}
@@ -4548,7 +4775,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVZIPMask(M, VT, WhichResult) ||
isVTRN_v_undef_Mask(M, VT, WhichResult) ||
isVUZP_v_undef_Mask(M, VT, WhichResult) ||
- isVZIP_v_undef_Mask(M, VT, WhichResult));
+ isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4652,6 +4880,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
&VTBLMask[0], 8));
}
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, MVT::i32));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4789,6 +5034,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
if (VT == MVT::v8i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
if (NewOp.getNode())
@@ -4917,16 +5165,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
-/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
-/// load, or BUILD_VECTOR with extended elements, return the unextended value.
-static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
+ EVT NewVT;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Orig Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ NewVT = MVT::v2i32;
+ break;
+ case MVT::v4i8:
+ NewVT = MVT::v4i16;
+ break;
+ }
+ return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+ SDValue NonExtendingLoad =
+ DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+ unsigned ExtOp = 0;
+ switch (LD->getExtensionType()) {
+ default: llvm_unreachable("Unexpected LoadExtType");
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
+ }
+ MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
+ MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
+ return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
+ MemType, ExtType, ExtOp);
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
- return N->getOperand(0);
+ return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
- return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ return SkipLoadExtensionForVMULL(LD, DAG);
+
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
if (N->getOpcode() == ISD::BITCAST) {
@@ -4981,7 +5289,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
EVT VT = Op.getValueType();
- assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
@@ -5024,9 +5333,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Legalize to a VMULL instruction.
DebugLoc DL = Op.getDebugLoc();
SDValue Op0;
- SDValue Op1 = SkipExtension(N1, DAG);
+ SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
- Op0 = SkipExtension(N0, DAG);
+ Op0 = SkipExtensionForVMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
@@ -5041,8 +5350,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// vaddl q0, d4, d5
// vmovl q1, d6
// vmul q0, q0, q1
- SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
- SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
@@ -5328,6 +5637,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
@@ -5360,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
@@ -5388,6 +5702,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_CMP_SWAP:
ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
return;
+ case ISD::ATOMIC_LOAD_MIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_MAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -5727,7 +6053,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *
ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
- bool NeedsCarry, bool IsCmpxchg) const {
+ bool NeedsCarry, bool IsCmpxchg,
+ bool IsMinMax, ARMCC::CondCodes CC) const {
// This also handles ATOMIC_SWAP, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -5751,21 +6078,17 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
- unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
- unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
-
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
- if (IsCmpxchg) {
+ if (IsCmpxchg || IsMinMax)
contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ if (IsCmpxchg)
cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
- }
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
MF->insert(It, loopMBB);
- if (IsCmpxchg) {
- MF->insert(It, contBB);
- MF->insert(It, cont2BB);
- }
+ if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
+ if (IsCmpxchg) MF->insert(It, cont2BB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -5792,22 +6115,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// cmp storesuccess, #0
// bne- loopMBB
// fallthrough --> exitMBB
- //
- // Note that the registers are explicitly specified because there is not any
- // way to force the register allocator to allocate a register pair.
- //
- // FIXME: The hardcoded registers are not necessary for Thumb2, but we
- // need to properly enforce the restriction that the two output registers
- // for ldrexd must be different.
BB = loopMBB;
+
// Load
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(ARM::R2, RegState::Define)
- .addReg(ARM::R3, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ unsigned StoreLo, StoreHi;
if (IsCmpxchg) {
// Add early exit
for (unsigned i = 0; i < 2; i++) {
@@ -5823,26 +6150,60 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
}
// Copy to physregs for strexd
- unsigned setlo = MI->getOperand(5).getReg();
- unsigned sethi = MI->getOperand(6).getReg();
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ StoreLo = MI->getOperand(5).getReg();
+ StoreHi = MI->getOperand(6).getReg();
} else if (Op1) {
// Perform binary operation
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
.addReg(destlo).addReg(vallo))
.addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
- .addReg(desthi).addReg(valhi)).addReg(0);
+ unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
+ .addReg(desthi).addReg(valhi))
+ .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
+
+ StoreLo = tmpRegLo;
+ StoreHi = tmpRegHi;
} else {
// Copy to physregs for strexd
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
+ StoreLo = vallo;
+ StoreHi = valhi;
+ }
+ if (IsMinMax) {
+ // Compare and branch to exit block.
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(contBB);
+ BB = contBB;
+ StoreLo = vallo;
+ StoreHi = valhi;
}
// Store
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
+ } else {
+ // Marshal a pair...
+ unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(StoreLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
+ .addReg(r1)
+ .addReg(StoreHi)
+ .addImm(ARM::gsub_1);
+
+ // ...and store it
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ .addReg(StorePair).addReg(ptr));
+ }
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
@@ -6043,6 +6404,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
unsigned UId = AFI->createJumpTableUId();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
// Create the MBBs for the dispatch code.
@@ -6051,7 +6413,13 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DispatchBB->setIsLandingPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
- BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ unsigned trap_opcode;
+ if (Subtarget->isThumb())
+ trap_opcode = ARM::tTRAP;
+ else
+ trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
+ BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -6197,11 +6565,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(0)
.addMemOperand(JTMMOLd));
- unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg3));
+ unsigned NewVReg6 = NewVReg5;
+ if (RelocM == Reloc::PIC_) {
+ NewVReg6 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3));
+ }
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
.addReg(NewVReg6, RegState::Kill)
@@ -6281,11 +6652,18 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(0)
.addMemOperand(JTMMOLd));
- BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg4)
- .addJumpTableIndex(MJTI)
- .addImm(UId);
+ if (RelocM == Reloc::PIC_) {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ } else {
+ BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
+ .addReg(NewVReg5, RegState::Kill)
+ .addJumpTableIndex(MJTI)
+ .addImm(UId);
+ }
}
// Add the jump table entries as successors to the MBB.
@@ -6334,7 +6712,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DefRegs[OI->getReg()] = true;
}
- MachineInstrBuilder MIB(&*II);
+ MachineInstrBuilder MIB(*MF, &*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
@@ -6411,8 +6789,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat) &&
+ if (!MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -6840,6 +7219,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
+ case ARM::ATOMMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LT);
+ case ARM::ATOMMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::GE);
+ case ARM::ATOMUMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LO);
+ case ARM::ATOMUMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::HS);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -9111,7 +9510,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
@@ -9120,15 +9519,27 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
return false;
case MVT::i8:
case MVT::i16:
- case MVT::i32:
+ case MVT::i32: {
// Unaligned access can use (for example) LRDB, LRDH, LDR
- return AllowsUnaligned;
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = Subtarget->hasV7Ops();
+ return true;
+ }
+ return false;
+ }
case MVT::f64:
- case MVT::v2f64:
+ case MVT::v2f64: {
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explictly support unaligned accesses
- return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
+ if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
}
}
@@ -9140,33 +9551,59 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
- if (IsZeroVal &&
- !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
- Subtarget->hasNEON()) {
- if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
- return MVT::v4i32;
- } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
- return MVT::v2i32;
+ if ((!IsMemset || ZeroMemset) &&
+ Subtarget->hasNEON() &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
+ bool Fast;
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+ return MVT::v2f64;
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+ return MVT::f64;
}
}
// Lowering to i32/i16 if the size permits.
- if (Size >= 4) {
+ if (Size >= 4)
return MVT::i32;
- } else if (Size >= 2) {
+ else if (Size >= 2)
return MVT::i16;
- }
// Let the target-independent logic figure it out.
return MVT::Other;
}
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+ return true;
+ }
+
+ return false;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4eb3b2cb5150..9ee17f0781b9 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,11 +17,11 @@
#include "ARM.h"
#include "ARMSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include <vector>
namespace llvm {
@@ -232,7 +232,11 @@ namespace llvm {
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG
+ ATOMCMPXCHG64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMIN64_DAG,
+ ATOMMAX64_DAG,
+ ATOMUMAX64_DAG
};
}
@@ -248,7 +252,7 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
- virtual unsigned getJumpTableEncoding(void) const;
+ virtual unsigned getJumpTableEncoding() const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -281,15 +285,19 @@ namespace llvm {
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+ /// unaligned memory accesses of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const;
+ using TargetLowering::isZExtFree;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -358,7 +366,7 @@ namespace llvm {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
@@ -384,7 +392,7 @@ namespace llvm {
unsigned Intrinsic) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
+ findRepresentativeClass(MVT VT) const;
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -495,6 +503,12 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
+
+ virtual bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -526,7 +540,9 @@ namespace llvm {
unsigned Op1,
unsigned Op2,
bool NeedsCarry = false,
- bool IsCmpxchg = false) const;
+ bool IsCmpxchg = false,
+ bool IsMinMax = false,
+ ARMCC::CondCodes CC = ARMCC::AL) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index a0b6f249a286..80f0ec74376a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index df2e55ed5c0e..11550c5ae678 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -117,7 +117,7 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
SDNPVariadic]>;
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -239,6 +239,9 @@ def IsARM : Predicate<"!Subtarget->isThumb()">,
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
+ AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -417,6 +420,8 @@ def reglist : Operand<i32> {
let DecoderMethod = "DecodeRegListOperand";
}
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
def dpr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
@@ -1005,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1017,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1032,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1047,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1074,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1086,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1100,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1115,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1140,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
4, iir,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
}
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_imm:$shift))]>;
+ so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_reg:$shift))]>;
+ so_reg_reg:$shift))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
@@ -1169,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
@@ -1194,7 +1214,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
PatFrag opnode, bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
- [(opnode GPR:$Rn, so_imm:$imm)]> {
+ [(opnode GPR:$Rn, so_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
@@ -1207,7 +1228,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
}
def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
opc, "\t$Rn, $Rm",
- [(opnode GPR:$Rn, GPR:$Rm)]> {
+ [(opnode GPR:$Rn, GPR:$Rm)]>,
+ Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
let isCommutable = Commutable;
@@ -1223,7 +1245,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsi : AI1<opcod, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -1239,7 +1262,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsr : AI1<opcod, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -1321,7 +1345,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1333,7 +1358,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1348,7 +1374,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
(ins GPR:$Rn, so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1364,7 +1391,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPRnopc:$Rd, CPSR,
(opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1387,7 +1415,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1398,7 +1427,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1411,7 +1441,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1425,7 +1456,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1622,6 +1654,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
GPR:$set1, GPR:$set2),
NoItinerary, []>;
+def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
}
def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
@@ -1748,11 +1792,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
let Inst{3-0} = opt;
}
-// A5.4 Permanently UNDEFINED instructions.
+/*
+ * A5.4 Permanently UNDEFINED instructions.
+ *
+ * For most targets use UDF #65006, for which the OS will generate SIGTRAP.
+ * Other UDF encodings generate SIGILL.
+ *
+ * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb.
+ * Encoding A1:
+ * 1110 0111 1111 iiii iiii iiii 1111 iiii
+ * Encoding T1:
+ * 1101 1110 iiii iiii
+ * It uses the following encoding:
+ * 1110 0111 1111 1110 1101 1110 1111 0000
+ * - In ARM: UDF #60896;
+ * - In Thumb: UDF #254 followed by a branch-to-self.
+ */
+let isBarrier = 1, isTerminator = 1 in
+def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,UseNaClTrap]> {
+ let Inst = 0xe7fedef0;
+}
let isBarrier = 1, isTerminator = 1 in
def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
"trap", [(trap)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM,DontUseNaClTrap]> {
let Inst = 0xe7ffdefe;
}
@@ -1804,7 +1869,8 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
- MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<14> label;
let Inst{27-25} = 0b001;
@@ -2065,6 +2131,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
let Inst{24-23} = 0b11;
}
+def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>;
+def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>;
+def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>;
+def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>;
+def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>;
+
// Return From Exception
class RFEI<bit wb, string asm>
: XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
@@ -3816,28 +3894,33 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "clz", "\t$Rd, $Rm",
- [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+ Sched<[WriteALU]>;
def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rbit", "\t$Rd, $Rm",
[(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
- Requires<[IsARM, HasV6T2]>;
+ Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev", "\t$Rd, $Rm",
- [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
let AddedComplexity = 5 in
def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev16", "\t$Rd, $Rm",
[(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
[(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
(and (srl GPR:$Rm, (i32 8)), 0xFF)),
@@ -3849,7 +3932,8 @@ def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
(and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
@@ -3865,7 +3949,8 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
(and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -4229,8 +4314,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrex", "\t$Rt, $addr", []>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
- NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
}
@@ -4244,8 +4329,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
- NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3cf213cbffee..0411ac4e282a 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
"$Vd, $Vm, #0", NEONvceqz>;
@@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$Vd, $Vm, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$Vd, $Vm, #0", NEONvclez>;
+}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$Vd, $Vm, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$Vd, $Vm, #0", NEONvcltz>;
+}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
@@ -4877,12 +4882,15 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
-def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAQ, "vabs", "f32",
- v4f32, v4f32, int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
@@ -5737,6 +5745,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 002d64a2d039..c9d709eb5222 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
let ParserMatchClass = imm0_4095_neg_asmoperand;
}
-def imm0_255_neg : PatLeaf<(i32 imm), [{
- return (uint32_t)(-N->getZExtValue()) < 255;
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+ uint32_t Val = -N->getZExtValue();
+ return (Val > 0 && Val < 255);
}], imm_neg_XFORM>;
def imm0_255_not : PatLeaf<(i32 imm), [{
@@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// The AddedComplexity preferences the first variant over the others since
// it can be shrunk to a 16-bit wide encoding, while the others cannot.
let AddedComplexity = 1 in
-def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
- (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
+ (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
@@ -1938,8 +1939,8 @@ def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
(t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
let AddedComplexity = 1 in
-def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
- (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
@@ -2314,13 +2315,15 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
/// changed to modify CPSR.
multiclass T2I_un_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ PatFrag opnode,
+ bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
// shifted imm
def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
opc, "\t$Rd, $imm",
[(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
+ let isMoveImm = MoveImm;
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -2354,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
- UnOpFrag<(not node:$Src)>, 1, 1>;
+ UnOpFrag<(not node:$Src)>, 1, 1, 1>;
let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
@@ -3478,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
"srsia","\tsp, $mode", []>;
+
+def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>;
+
+def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
+
// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 254d8f6b7c7a..351a290e2aa0 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -17,12 +17,12 @@
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 792818442724..23a6a9b512f4 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -15,12 +15,12 @@
#define ARMJITINFO_H
#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
namespace llvm {
class ARMTargetMachine;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 0185289f3bd8..b7ac5d57c362 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -18,8 +18,12 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -27,19 +31,15 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
@@ -87,6 +87,53 @@ namespace {
MachineBasicBlock::iterator i)
: Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
+ class UnitRegsMap {
+ public:
+ UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
+ const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
+ DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
+ Cache.find(Reg);
+ if (found != Cache.end())
+ return found->second;
+ else
+ return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
+ .first->second;
+ }
+ private:
+ SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
+ SmallVector<unsigned, 4> Res;
+
+ const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
+ if (TRC == &ARM::QPRRegClass) {
+ if (Reg > ARM::Q7) {
+ Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
+ return Res;
+ }
+
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
+
+ return Res;
+ }
+
+ if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+ Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+
+ return Res;
+ }
+
+ Res.push_back(Reg);
+
+ return Res;
+
+ }
+ const TargetRegisterInfo* TRI;
+ DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
+ };
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -128,6 +175,11 @@ namespace {
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I);
+ unsigned AddMemOp(MemOpQueue& MemOps,
+ const MemOpQueueEntry newEntry,
+ UnitRegsMap& UnitRegsInfo,
+ SmallSet<unsigned, 4>& UsedUnitRegs,
+ unsigned At = -1U);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -865,7 +917,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
- if (isLd && MI->getOperand(0).getReg() == Base)
+ if (MI->getOperand(0).getReg() == Base)
return false;
unsigned PredReg = 0;
@@ -1188,7 +1240,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddDeadKill = true;
}
// Never kill the base register in the first instruction.
- // <rdar://problem/11101911>
if (EvenReg == BaseReg)
EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
@@ -1214,12 +1265,103 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
return false;
}
+/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
+/// It adds store mem ops with simple push_back/insert method,
+/// without any additional logic.
+/// For load operation it does the next:
+/// 1. Adds new load operation into MemOp collection at "At" position.
+/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
+/// contents, prior to "At".
+/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
+/// UsedUnitRegs - set of unit-registers currently in use.
+/// At - position at which it would added, and prior which the clean-up
+/// should be made (for load operation).
+/// FIXME: The clean-up also should be made for store operations,
+/// but the memory address should be analyzed instead of unit registers.
+unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
+ const MemOpQueueEntry NewEntry,
+ UnitRegsMap& UnitRegsInfo,
+ SmallSet<unsigned, 4>& UsedUnitRegs,
+ unsigned At) {
+ unsigned Cleaned = 0;
+
+ if (At == -1U) {
+ At = MemOps.size();
+ MemOps.push_back(NewEntry);
+ } else
+ MemOps.insert(&MemOps[At], NewEntry);
+
+ // FIXME:
+ // If operation is not load, leave it as is by now,
+ // So 0 overridden ops would cleaned in this case.
+ if (!NewEntry.MBBI->mayLoad())
+ return 0;
+
+ const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
+
+ bool FoundOverriddenLoads = false;
+
+ for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
+ if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
+ FoundOverriddenLoads = true;
+ break;
+ }
+
+ // If we detect that this register is used by load operations that are
+ // predecessors for the new one, remove them from MemOps then.
+ if (FoundOverriddenLoads) {
+ MemOpQueue UpdatedMemOps;
+
+ // Scan through MemOps entries.
+ for (unsigned i = 0; i != At; ++i) {
+ MemOpQueueEntry& MemOpEntry = MemOps[i];
+
+ // FIXME: Skip non-load operations by now.
+ if (!MemOpEntry.MBBI->mayLoad())
+ continue;
+
+ const SmallVector<unsigned, 4>& MemOpUnitRegs =
+ UnitRegsInfo[MemOpEntry.Reg];
+
+ // Lookup entry that loads contents into register used by new entry.
+ bool ReleaseThisEntry = false;
+ for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
+ if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
+ MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
+ ReleaseThisEntry = true;
+ ++Cleaned;
+ break;
+ }
+ }
+
+ if (ReleaseThisEntry) {
+ const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
+ for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
+ UsedUnitRegs.erase(RelesedRegs[r]);
+ } else
+ UpdatedMemOps.push_back(MemOpEntry);
+ }
+
+ // Keep anything without changes after At position.
+ for (unsigned i = At, e = MemOps.size(); i != e; ++i)
+ UpdatedMemOps.push_back(MemOps[i]);
+
+ MemOps.swap(UpdatedMemOps);
+ }
+
+ UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
+
+ return Cleaned;
+}
+
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
MemOpQueue MemOps;
+ UnitRegsMap UnitRegsInfo(TRI);
+ SmallSet<unsigned, 4> UsedRegUnits;
unsigned CurrBase = 0;
int CurrOpc = -1;
unsigned CurrSize = 0;
@@ -1266,8 +1408,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
+
MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
++NumMemOps;
+ const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
+ UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
Advance = true;
} else {
if (Clobber) {
@@ -1279,20 +1424,24 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
+ unsigned OverridesCleaned =
+ AddMemOp(MemOps,
+ MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
+ UnitRegsInfo, UsedRegUnits) != 0;
+ NumMemOps += 1 - OverridesCleaned;
Advance = true;
} else {
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
- I != E; ++I) {
- if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
+ for (unsigned I = 0; I != NumMemOps; ++I) {
+ if (Offset < MemOps[I].Offset) {
+ MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
+ unsigned OverridesCleaned =
+ AddMemOp(MemOps, entry, UnitRegsInfo,
+ UsedRegUnits, I) != 0;
+ NumMemOps += 1 - OverridesCleaned;
+
Advance = true;
break;
- } else if (Offset == I->Offset) {
+ } else if (Offset == MemOps[I].Offset) {
// Collision! This can't be merged!
break;
}
@@ -1363,6 +1512,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrPredReg = 0;
if (NumMemOps) {
MemOps.clear();
+ UsedRegUnits.clear();
NumMemOps = 0;
}
@@ -1408,7 +1558,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(&*MBBI);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
MBB.erase(MBBI);
return true;
}
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e2ac9a466ed8..b6414832003d 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -15,8 +15,8 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index c0ac04b6003c..88d96c0be8a7 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,10 +15,10 @@
#define ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 02196d06bfd3..2d088de96e27 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -6,6 +6,77 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+// Uops | Latency from register | Uops - resource requirements - latency
+// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3
+// | | uopc Rd, Rn, T0 - P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+// def WriteALUsr : SchedWrite;
+// def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+// ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+// // Resources.
+// def P01 : ProcResource<3>; // ALU unit (3 of it).
+// ...
+// // Resource usages.
+// def : WriteRes<WriteALUsr, [P01, P01]> {
+// Latency = 4; // Latency of 4.
+// NumMicroOps = 2; // Dispatch 2 micro-ops.
+// // The two instances of resource P01 are occupied for one cycle. It is one
+// // cycle because these resources happen to be pipelined.
+// ResourceCycles = [1, 1];
+// }
+// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 404634fee989..9739ed20ce2e 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let ILPWindow = 10; // Don't reschedule small blocks to hide
+ // latency. Minimum latency requirements are already
+ // modeled strictly by reserving resources.
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
@@ -1895,6 +1898,8 @@ def CortexA9Model : SchedMachineModel {
//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available.
+let SchedModel = CortexA9Model in {
+
def A9UnitALU : ProcResource<2>;
def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
def A9UnitAGU : ProcResource<1>;
@@ -1915,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>;
def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
// Basic ALU.
-def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
// ALU with operand shifted by immediate.
-def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
// ALU with operand shifted by register.
-def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
// Multiplication
def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
@@ -2000,13 +2005,6 @@ foreach NumCycles = 2-8 in {
def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
} // foreach NumCycles
-// Define TII for use in SchedVariant Predicates.
-def : PredicateProlog<[{
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
- (void)TII;
-}]>;
-
// Define address generation sequences and predicates for 8 flavors of LDMs.
foreach NumAddr = 1-8 in {
@@ -2251,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[
// These mov immediate writers are unconditionally expanded with
// additive latency.
def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
-def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
// Some ALU operations can read loaded integer values one cycle early.
-def A9ReadA : SchedReadAdvance<1,
+def A9ReadALU : SchedReadAdvance<1,
[A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
@@ -2276,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>;
// This table follows the ARM Cortex-A9 Technical Reference Manuals,
// mostly in order.
-let SchedModel = CortexA9Model in {
def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
IIC_iMVNi,IIC_iMVNsi,
IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
-def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
-def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
-def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
-def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
-def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
-def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
-def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
-def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
// A9WriteHi ignored for MUL32.
def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
@@ -2368,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
IIC_iStore_m,
IIC_iStore_mu]>;
def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
-def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
@@ -2483,4 +2480,17 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
+// advance. But our instrinfo claims it does.
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index e9bc3e0f3955..7c6df410706e 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1078,8 +1078,67 @@ def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
let LoadLatency = 3;
+ let MispredictPenalty = 14; // A branch direction mispredict.
let Itineraries = SwiftItineraries;
}
-// TODO: Add Swift processor and scheduler resources.
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+ // Processor resources.
+ def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+ def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+ def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+ def SwiftUnitP2 : ProcResource<1>; // LS unit.
+ def SwiftUnitDiv : ProcResource<1>;
+
+ // Generic resource requirements.
+ def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+ def SwiftWriteP01ThreeCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+ }
+
+ // 4.2.4 Arithmetic and Logical.
+ // ALU operation register shifted by immediate variant.
+ def SwiftWriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+ ]>;
+ def SwiftWriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftWriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftReadAdvanceALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+ SchedVar<NoSchedPred, [NoReadAdvance]>
+ ]>;
+ // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+ // AND,BIC,EOR,ORN,ORR
+ // CLZ,RBIT,REV,REV16,REVSH,PKH
+ def : WriteRes<WriteALU, [SwiftUnitP01]>;
+ def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+ def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+ def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+ def : ReadAdvance<ReadALU, 0>;
+ def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+ // 4.2.5 Integer comparison
+ def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index b33b3c915a6e..41a7e0c2c8a5 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "arm-selectiondag-info"
#include "ARMTargetMachine.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index bcc9db4ae3e3..739300e4eff9 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,11 +12,14 @@
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -40,60 +43,88 @@ StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
+ const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
- , HasV4TOps(false)
- , HasV5TOps(false)
- , HasV5TEOps(false)
- , HasV6Ops(false)
- , HasV6T2Ops(false)
- , HasV7Ops(false)
- , HasVFPv2(false)
- , HasVFPv3(false)
- , HasVFPv4(false)
- , HasNEON(false)
- , UseNEONForSinglePrecisionFP(false)
- , UseMulOps(UseFusedMulOps)
- , SlowFPVMLx(false)
- , HasVMLxForwarding(false)
- , SlowFPBrcc(false)
- , InThumbMode(false)
- , HasThumb2(false)
- , IsMClass(false)
- , NoARM(false)
- , PostRAScheduler(false)
- , IsR9Reserved(ReserveR9)
- , UseMovt(false)
- , SupportsTailCall(false)
- , HasFP16(false)
- , HasD16(false)
- , HasHardwareDivide(false)
- , HasHardwareDivideInARM(false)
- , HasT2ExtractPack(false)
- , HasDataBarrier(false)
- , Pref32BitThumb(false)
- , AvoidCPSRPartialUpdate(false)
- , HasRAS(false)
- , HasMPExtension(false)
- , FPOnlySP(false)
- , AllowsUnalignedMem(false)
- , Thumb2DSP(false)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
+ , Options(Options)
, TargetABI(ARM_ABI_APCS) {
- // Determine default and user specified characteristics
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ IsMClass = false;
+ NoARM = false;
+ PostRAScheduler = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ FPOnlySP = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+ UnsafeFPMath = false;
+}
+
+void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty())
CPUString = "generic";
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
+ CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS;
+ ArchFS = ArchFS + "," + FS.str();
else
ArchFS = FS;
}
@@ -110,7 +141,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass()))
+ if ((TargetTriple.getTriple().find("eabi") != std::string::npos) ||
+ (isTargetIOS() && isMClass()))
// FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
// Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
@@ -133,6 +165,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
// configuration.
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8e6b6506022d..5b5ee6aeb865 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -15,9 +15,9 @@
#define ARMSUBTARGET_H
#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -26,11 +26,12 @@
namespace llvm {
class GlobalValue;
class StringRef;
+class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA8, CortexA9, CortexA15, Swift
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -131,6 +132,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate;
+ /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+ /// movs with shifter operand (i.e. asr, lsl, lsr).
+ bool AvoidMOVsShifterOperand;
+
/// HasRAS - Some processors perform return stack prediction. CodeGen should
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRAS;
@@ -152,6 +157,12 @@ protected:
/// and such) instructions in Thumb2 code.
bool Thumb2DSP;
+ /// NaCl TRAP instruction is generated instead of the regular TRAP.
+ bool UseNaClTrap;
+
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -168,6 +179,9 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
public:
enum {
isELF, isDarwin
@@ -182,7 +196,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -195,6 +209,12 @@ protected:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ /// \brief Reset the features for the ARM target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
void computeIssueWidth();
bool hasV4TOps() const { return HasV4TOps; }
@@ -204,12 +224,14 @@ protected:
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
+ bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
+ bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool hasARMOps() const { return !NoARM; }
@@ -231,9 +253,11 @@ protected:
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool useNaClTrap() const { return UseNaClTrap; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
@@ -243,7 +267,7 @@ protected:
bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NativeClient;
+ return TargetTriple.getOS() == Triple::NaCl;
}
bool isTargetELF() const { return !isTargetDarwin(); }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b486d4fe2ef9..42c7d2c437e0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetMachine.h"
-#include "ARMFrameLowering.h"
#include "ARM.h"
-#include "llvm/PassManager.h"
+#include "ARMFrameLowering.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
+ Subtarget(TT, CPU, FS, Options),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
@@ -51,6 +56,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
this->Options.FloatABIType = FloatABI::Soft;
}
+void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our ARM pass. This
+ // allows the ARM pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createARMTargetTransformInfoPass(this));
+}
+
+
void ARMTargetMachine::anchor() { }
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
@@ -70,8 +84,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"v128:64:128-v64:64:64-n32-S32")),
TLInfo(*this),
TSInfo(*this),
- FrameLowering(Subtarget),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ FrameLowering(Subtarget) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -103,8 +116,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
namespace {
@@ -157,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index ebdd5b4d64c9..d4caf5ca6e19 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,20 +14,19 @@
#ifndef ARMTARGETMACHINE_H
#define ARMTARGETMACHINE_H
-#include "ARMInstrInfo.h"
#include "ARMFrameLowering.h"
-#include "ARMJITInfo.h"
-#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -47,10 +46,17 @@ public:
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ // Implemented by derived classes
+ llvm_unreachable("getTargetLowering not implemented");
+ }
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
+ /// \brief Register ARM analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
@@ -66,8 +72,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -89,12 +93,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const DataLayout *getDataLayout() const { return &DL; }
};
@@ -112,8 +110,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -142,12 +138,6 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
};
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 3d85ca7d6995..dfdf6ab356a3 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,12 +9,14 @@
#include "ARMTargetObjectFile.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
using namespace llvm;
using namespace dwarf;
@@ -38,3 +40,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
+
+const MCExpr *ARMElfTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+ return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ MCSymbolRefExpr::VK_ARM_TARGET2,
+ getContext());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439d7..7f60727e5305 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -28,6 +28,11 @@ public:
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 000000000000..1019b972e957
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,458 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armtti"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARMTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARMTTI : public ImmutablePass, public TargetTransformInfo {
+ const ARMBaseTargetMachine *TM;
+ const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ ARMTTI(const ARMBaseTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeARMTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+ /// @}
+
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 16;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaximumUnrollFactor() const {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const;
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
+
+ unsigned getAddressComputationCost(Type *Val) const;
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
+ "ARM Target Transform Info", true, true, false)
+char ARMTTI::ID = 0;
+
+ImmutablePass *
+llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
+ return new ARMTTI(TM);
+}
+
+
+unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return 4;
+
+ int32_t SImmVal = Imm.getSExtValue();
+ uint32_t ZImmVal = Imm.getZExtValue();
+ if (!ST->isThumb()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else if (ST->isThumb2()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else /*Thumb1*/ {
+ if (SImmVal >= 0 && SImmVal < 256)
+ return 1;
+ if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ return 2;
+ // Load from constantpool.
+ return 3;
+ }
+ return 2;
+}
+
+unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ // Some arithmetic, load and store operations have specific instructions
+ // to cast up/down their types automatically at no extra cost.
+ // TODO: Get these tables to know at least what the related operations are.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
+ // Vector float <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+
+ // Vector double <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
+ };
+
+ if (SrcTy.isVector() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
+ array_lengthof(NEONVectorConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorConversionTbl[Idx].Cost;
+ }
+
+ // Scalar float to integer conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
+ };
+ if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
+ array_lengthof(NEONFloatConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONFloatConversionTbl[Idx].Cost;
+ }
+
+ // Scalar integer to float conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
+ };
+
+ if (SrcTy.isInteger() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
+ array_lengthof(NEONIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONIntegerConversionTbl[Idx].Cost;
+ }
+
+ // Scalar integer conversion costs.
+ static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ // i16 -> i64 requires two dependent operations.
+ { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+ // Truncates on i64 are assumed to be free.
+ { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
+ };
+
+ if (SrcTy.isInteger()) {
+ int Idx =
+ ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
+ array_lengthof(ARMIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return ARMIntegerConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) const {
+ // Penalize inserting into an D-subregister. We end up with a three times
+ // lower estimated throughput on swift.
+ if (ST->isSwift() &&
+ Opcode == Instruction::InsertElement &&
+ ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return 3;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+}
+
+unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ // On NEON a a vector select gets lowered to vbsl.
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ return LT.first;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
+
+unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only handle costs of reverse shuffles for now.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a double
+ // word (vrev) or two if we shuffle a quad word (vrev, vext).
+ { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
+
+ { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
+ };
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
+ ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
deleted file mode 100644
index fda8536fcf6b..000000000000
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARMBaseInfo.h"
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-
-#include "llvm/Support/TargetRegistry.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-#include <string>
-#include <map>
-
-using namespace llvm;
-
-namespace {
-
-class ARMBaseAsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
-protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const MCRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
-public:
- ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI) {
- }
-};
-
-class ARMAsmLexer : public ARMBaseAsmLexer {
-public:
- ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-class ThumbAsmLexer : public ARMBaseAsmLexer {
-public:
- ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-} // end anonymous namespace
-
-AsmToken ARMBaseAsmLexer::LexTokenUAL() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default: break;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- break;
- case AsmToken::Identifier: {
- std::string lowerCase = lexedToken.getString().lower();
-
- unsigned regID = MatchRegisterName(lowerCase);
- // Check for register aliases.
- // r13 -> sp
- // r14 -> lr
- // r15 -> pc
- // ip -> r12
- // FIXME: Some assemblers support lots of others. Do we want them all?
- if (!regID) {
- regID = StringSwitch<unsigned>(lowerCase)
- .Case("r13", ARM::SP)
- .Case("r14", ARM::LR)
- .Case("r15", ARM::PC)
- .Case("ip", ARM::R12)
- .Default(0);
- }
-
- if (regID)
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- }
- }
-
- return AsmToken(lexedToken);
-}
-
-extern "C" void LLVMInitializeARMAsmLexer() {
- RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
- RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
-}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c61e3bd99d77..ed7b7ec9d2cd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,31 +7,34 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -178,7 +181,8 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
// Asm Match Converter Methods
void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
@@ -249,6 +253,13 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
+
+ // Set ELF header flags.
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
// Implementation of the MCTargetAsmParser interface:
@@ -258,6 +269,7 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
unsigned checkTargetMatchPredicate(MCInst &Inst);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -270,7 +282,7 @@ public:
namespace {
/// ARMOperand - Instances of this class represent a parsed ARM machine
-/// instruction.
+/// operand.
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
k_CondCode,
@@ -304,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SmallVector<unsigned, 8> Registers;
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
union {
- struct {
- ARMCC::CondCodes Val;
- } CC;
-
- struct {
- unsigned Val;
- } Cop;
-
- struct {
- unsigned Val;
- } CoprocOption;
-
- struct {
- unsigned Mask:4;
- } ITMask;
-
- struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
-
- struct {
- ARM_PROC::IFlags Val;
- } IFlags;
-
- struct {
- unsigned Val;
- } MMask;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- // A vector register list is a sequential list of 1 to 4 registers.
- struct {
- unsigned RegNum;
- unsigned Count;
- unsigned LaneIndex;
- bool isDoubleSpaced;
- } VectorList;
-
- struct {
- unsigned Val;
- } VectorIndex;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- /// Combined record for all forms of ARM address expressions.
- struct {
- unsigned BaseRegNum;
- // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
- // was specified.
- const MCConstantExpr *OffsetImm; // Offset immediate value
- unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
- ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
- unsigned ShiftImm; // shift for OffsetReg.
- unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (2, 4, 8, 16, or 32)
- unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
- } Memory;
-
- struct {
- unsigned RegNum;
- bool isAdd;
- ARM_AM::ShiftOpc ShiftTy;
- unsigned ShiftImm;
- } PostIdxReg;
-
- struct {
- bool isASR;
- unsigned Imm;
- } ShifterImm;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftReg;
- unsigned ShiftImm;
- } RegShiftedReg;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftImm;
- } RegShiftedImm;
- struct {
- unsigned Imm;
- } RotImm;
- struct {
- unsigned LSB;
- unsigned Width;
- } Bitfield;
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -2450,8 +2486,8 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
RegNo = tryParseRegister();
- EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2540,6 +2576,8 @@ int ARMAsmParser::tryParseShiftRegister(
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
+
+ SMLoc EndLoc;
int64_t Imm = 0;
int ShiftReg = 0;
if (ShiftTy == ARM_AM::rrx) {
@@ -2554,7 +2592,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
- if (getParser().ParseExpression(ShiftExpr)) {
+ if (getParser().parseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
}
@@ -2579,8 +2617,9 @@ int ARMAsmParser::tryParseShiftRegister(
if (Imm == 0)
ShiftTy = ARM_AM::lsl;
} else if (Parser.getTok().is(AsmToken::Identifier)) {
- ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ ShiftReg = tryParseRegister();
if (ShiftReg == -1) {
Error (L, "expected immediate or register in shift operand");
return -1;
@@ -2595,10 +2634,10 @@ int ARMAsmParser::tryParseShiftRegister(
if (ShiftReg && ShiftTy != ARM_AM::rrx)
Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
ShiftReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
else
Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
return 0;
}
@@ -2612,12 +2651,13 @@ int ARMAsmParser::tryParseShiftRegister(
/// parse for a specific register type.
bool ARMAsmParser::
tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
return true;
- Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+ RegTok.getEndLoc()));
const AsmToken &ExclaimTok = Parser.getTok();
if (ExclaimTok.is(AsmToken::Exclaim)) {
@@ -2635,16 +2675,16 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat left bracket token.
const MCExpr *ImmVal;
- if (getParser().ParseExpression(ImmVal))
+ if (getParser().parseExpression(ImmVal))
return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE)
return TokError("immediate value expected for vector index");
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
@@ -2780,7 +2820,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *Expr;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(Expr)) {
+ if (getParser().parseExpression(Expr)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -2794,7 +2834,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Check for and consume the closing '}'
if (Parser.getTok().isNot(AsmToken::RCurly))
return MatchOperand_ParseFail;
- SMLoc E = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the '}'
Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
@@ -2891,10 +2931,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
- return Error(EndLoc, "register expected");
+ return Error(AfterMinusLoc, "register expected");
// Allow Q regs and just interpret them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
@@ -2904,10 +2944,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!RC->contains(EndReg))
- return Error(EndLoc, "invalid register in register list");
+ return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
- return Error(EndLoc, "bad range in register list");
+ return Error(AfterMinusLoc, "bad range in register list");
// Add all the registers in the range to the register list.
while (Reg != EndReg) {
@@ -2955,9 +2995,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly))
- return Error(E, "'}' expected");
+ return Error(Parser.getTok().getLoc(), "'}' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
// Push the register list operand.
@@ -2974,13 +3014,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Helper function to parse the lane index for vector lists.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
Parser.Lex(); // Eat the '['.
if (Parser.getTok().is(AsmToken::RBrac)) {
// "Dn[]" is the 'all lanes' syntax.
LaneKind = AllLanes;
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
return MatchOperand_Success;
}
@@ -2992,7 +3033,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(LaneIndex)) {
+ if (getParser().parseExpression(LaneIndex)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3005,6 +3046,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
Error(Parser.getTok().getLoc(), "']' expected");
return MatchOperand_ParseFail;
}
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
int64_t Val = CE->getValue();
@@ -3031,21 +3073,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// (without encosing curly braces) as a single or double entry list,
// respectively.
if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc E = Parser.getTok().getEndLoc();
int Reg = tryParseRegister();
if (Reg == -1)
return MatchOperand_NoMatch;
- SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
S, E));
break;
@@ -3059,18 +3099,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
@@ -3111,7 +3149,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
++Reg;
++Count;
}
- if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+
+ SMLoc E;
+ if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
@@ -3125,10 +3165,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1) {
- Error(EndLoc, "register expected");
+ Error(AfterMinusLoc, "register expected");
return MatchOperand_ParseFail;
}
// Allow Q regs and just interpret them as the two D sub-registers.
@@ -3140,24 +3180,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
- Error(EndLoc, "invalid register in register list");
+ Error(AfterMinusLoc, "invalid register in register list");
return MatchOperand_ParseFail;
}
// Ranges must go from low to high.
if (Reg > EndReg) {
- Error(EndLoc, "bad range in register list");
+ Error(AfterMinusLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(AfterMinusLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
- EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -3196,11 +3236,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ SMLoc LaneLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(LaneLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
continue;
@@ -3221,7 +3262,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
Error(EndLoc, "mismatched lane index in register list");
@@ -3229,11 +3270,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly)) {
- Error(E, "'}' expected");
+ Error(Parser.getTok().getLoc(), "'}' expected");
return MatchOperand_ParseFail;
}
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
switch (LaneKind) {
@@ -3310,7 +3351,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *MemBarrierID;
- if (getParser().ParseExpression(MemBarrierID)) {
+ if (getParser().parseExpression(MemBarrierID)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3525,7 +3566,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
const MCExpr *ShiftAmount;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3540,7 +3582,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_ParseFail;
}
- Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
return MatchOperand_Success;
}
@@ -3550,7 +3592,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
int Val = StringSwitch<int>(Tok.getString())
@@ -3560,12 +3602,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the token.
if (Val == -1) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
getContext()),
- S, Parser.getTok().getLoc()));
+ S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -3601,16 +3643,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed shift expression");
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed shift expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "shift amount must be an immediate");
+ Error(ExLoc, "shift amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3618,25 +3661,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isASR) {
// Shift amount must be in [1,32]
if (Val < 1 || Val > 32) {
- Error(E, "'asr' shift amount must be in range [1,32]");
+ Error(ExLoc, "'asr' shift amount must be in range [1,32]");
return MatchOperand_ParseFail;
}
// asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
if (isThumb() && Val == 32) {
- Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
return MatchOperand_ParseFail;
}
if (Val == 32) Val = 0;
} else {
// Shift amount must be in [1,32]
if (Val < 0 || Val > 31) {
- Error(E, "'lsr' shift amount must be in range [0,31]");
+ Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
return MatchOperand_ParseFail;
}
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3662,16 +3704,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed rotate expression");
+ SMLoc EndLoc;
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed rotate expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "rotate amount must be an immediate");
+ Error(ExLoc, "rotate amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3680,12 +3723,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// normally, zero is represented in asm by omitting the rotate operand
// entirely.
if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
- Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3703,7 +3745,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *LSBExpr;
SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(LSBExpr)) {
+ if (getParser().parseExpression(LSBExpr)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3735,7 +3777,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat hash token.
const MCExpr *WidthExpr;
- if (getParser().ParseExpression(WidthExpr)) {
+ SMLoc EndLoc;
+ if (getParser().parseExpression(WidthExpr, EndLoc)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3751,9 +3794,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(E, "'width' operand must be in the range [1,32-lsb]");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
return MatchOperand_Success;
}
@@ -3772,7 +3814,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3781,15 +3822,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
Error(Parser.getTok().getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
unsigned ShiftImm = 0;
@@ -3797,6 +3838,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the ','.
if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
return MatchOperand_ParseFail;
+
+ // FIXME: Only approximates end...may include intervening whitespace.
+ E = Parser.getTok().getLoc();
}
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
@@ -3829,14 +3873,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// differently.
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *Offset;
- if (getParser().ParseExpression(Offset))
+ SMLoc E;
+ if (getParser().parseExpression(Offset, E))
return MatchOperand_ParseFail;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
if (!CE) {
Error(S, "constant expression expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Tok.getLoc();
// Negative zero is encoded as the flag value INT32_MIN.
int32_t Val = CE->getValue();
if (isNegative && Val == 0)
@@ -3851,7 +3895,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3860,18 +3903,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ Tok = Parser.getTok();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
- Error(Parser.getTok().getLoc(), "register expected");
+ Error(Tok.getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
- 0, S, E));
+ 0, S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -4218,13 +4261,14 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (BaseRegNum == -1)
return Error(BaseRegTok.getLoc(), "register expected");
- // The next token must either be a comma or a closing bracket.
+ // The next token must either be a comma, a colon or a closing bracket.
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) &&
+ !Tok.is(AsmToken::RBrac))
return Error(Tok.getLoc(), "malformed memory operand");
if (Tok.is(AsmToken::RBrac)) {
- E = Tok.getLoc();
+ E = Tok.getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
@@ -4240,8 +4284,11 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return false;
}
- assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
- Parser.Lex(); // Eat the comma.
+ assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) &&
+ "Lost colon or comma in memory operand?!");
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ }
// If we have a ':', it's an alignment specifier.
if (Parser.getTok().is(AsmToken::Colon)) {
@@ -4249,7 +4296,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
const MCExpr *Expr;
- if (getParser().ParseExpression(Expr))
+ if (getParser().parseExpression(Expr))
return true;
// The expression has to be a constant. Memory references with relocations
@@ -4272,9 +4319,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4305,7 +4352,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool isNegative = getParser().getTok().is(AsmToken::Minus);
const MCExpr *Offset;
- if (getParser().ParseExpression(Offset))
+ if (getParser().parseExpression(Offset))
return true;
// The expression has to be a constant. Memory references with relocations
@@ -4321,9 +4368,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
CE = MCConstantExpr::Create(INT32_MIN, getContext());
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4367,9 +4414,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
@@ -4424,7 +4471,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Parser.Lex(); // Eat hash token.
const MCExpr *Expr;
- if (getParser().ParseExpression(Expr))
+ if (getParser().parseExpression(Expr))
return true;
// Range check the immediate.
// lsl, ror: 0 <= imm <= 31
@@ -4453,7 +4500,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Anything that can accept a floating point constant as an operand
- // needs to go through here, as the regular ParseExpression is
+ // needs to go through here, as the regular parseExpression is
// integer only.
//
// This routine still creates a generic Immediate operand, containing
@@ -4546,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!tryParseRegisterWithWriteBack(Operands))
- return false;
- int Res = tryParseShiftRegister(Operands);
- if (Res == 0) // success
- return false;
- else if (Res == -1) // irrecoverable error
- return true;
- // If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" &&
- Parser.getTok().getString().equals_lower("apsr_nzcv")) {
- S = Parser.getTok().getLoc();
- Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
- return false;
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
}
// Fall though for the Identifier case that is not a register or a
@@ -4573,7 +4626,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
// identifier (like labels) as expressions and create them as immediates.
const MCExpr *IdVal;
S = Parser.getTok().getLoc();
- if (getParser().ParseExpression(IdVal))
+ if (getParser().parseExpression(IdVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
@@ -4592,7 +4645,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
if (Parser.getTok().isNot(AsmToken::Colon)) {
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *ImmVal;
- if (getParser().ParseExpression(ImmVal))
+ if (getParser().parseExpression(ImmVal))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
if (CE) {
@@ -4602,6 +4655,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
}
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+
+ // There can be a trailing '!' on operands that we want as a separate
+ // '!' Token operand. Handle that here. For example, the compatibilty
+ // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
return false;
}
// w/ a ':' after the '#', it's just like a plain ':'.
@@ -4616,7 +4678,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return true;
const MCExpr *SubExprVal;
- if (getParser().ParseExpression(SubExprVal))
+ if (getParser().parseExpression(SubExprVal))
return true;
const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
@@ -4989,7 +5051,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// In Thumb1, only the branch (B) instruction can be predicated.
if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "conditional execution not supported in Thumb1");
}
@@ -5003,14 +5065,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Mnemonic == "it") {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
if (ITMask.size() > 3) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "too many conditions on IT instruction");
}
unsigned Mask = 8;
for (unsigned i = ITMask.size(); i != 0; --i) {
char pos = ITMask[i - 1];
if (pos != 't' && pos != 'e') {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
}
Mask >>= 1;
@@ -5036,14 +5098,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// If we had a carry-set on an instruction that can't do that, issue an
// error.
if (!CanAcceptCarrySet && CarrySetting) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' can not set flags, but 's' suffix specified");
}
// If we had a predication code on an instruction that can't do that, issue an
// error.
if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' is not predicable, but condition code specified");
}
@@ -5092,7 +5154,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
@@ -5101,7 +5163,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Parse and remember the operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
}
@@ -5109,7 +5171,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -5140,50 +5202,42 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
delete Op;
}
- // The vector-compare-to-zero instructions have a literal token "#0" at
- // the end that comes to here as an immediate operand. Convert it to a
- // token to play nicely with the matcher.
- if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
- Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
- Operands.erase(Operands.begin() + 5);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
- }
- }
- // VCMP{E} does the same thing, but with a different operand count.
- if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[4])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
- Operands.erase(Operands.begin() + 4);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
- }
- }
- // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here. Take care not to convert those
- // that should hit the Thumb2 encoding.
- if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0 &&
- (isThumbOne() ||
- // The cc_out operand matches the IT block.
- ((inITBlock() != CarrySetting) &&
- // Neither register operand is a high register.
- (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
- Operands.erase(Operands.begin() + 5);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
+ bool isLoad = (Mnemonic == "ldrexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
+ ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+
+ const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
+ MRC.contains(Op2->getReg())) {
+ unsigned Reg1 = Op1->getReg();
+ unsigned Reg2 = Op2->getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ Error(Op2->getStartLoc(), isLoad ?
+ "destination operands must be sequential" :
+ "source operands must be sequential");
+ return true;
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+ &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
+ Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
+ NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
+ delete Op1;
+ delete Op2;
}
}
@@ -5274,8 +5328,7 @@ validateInstruction(MCInst &Inst,
switch (Inst.getOpcode()) {
case ARM::LDRD:
case ARM::LDRD_PRE:
- case ARM::LDRD_POST:
- case ARM::LDREXD: {
+ case ARM::LDRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
@@ -5294,8 +5347,7 @@ validateInstruction(MCInst &Inst,
return false;
}
case ARM::STRD_PRE:
- case ARM::STRD_POST:
- case ARM::STREXD: {
+ case ARM::STRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -5686,7 +5738,12 @@ processInstruction(MCInst &Inst,
}
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
- Inst.setOpcode(ARM::t2LDRpci);
+ // Select the narrow version if the immediate will fit.
+ if (Inst.getOperand(1).getImm() > 0 &&
+ Inst.getOperand(1).getImm() <= 0xff)
+ Inst.setOpcode(ARM::tLDRpci);
+ else
+ Inst.setOpcode(ARM::t2LDRpci);
return true;
case ARM::t2LDRBpcrel:
Inst.setOpcode(ARM::t2LDRBpci);
@@ -7483,6 +7540,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
+
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
@@ -7595,10 +7653,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -7742,13 +7800,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
unsigned Reg;
SMLoc SRegLoc, ERegLoc;
if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(SRegLoc, "register name expected");
}
// Shouldn't be anything else.
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Parser.getTok().getLoc(),
"unexpected input in .req directive.");
}
@@ -7766,7 +7824,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// ::= .unreq registername
bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(L, "unexpected input in .unreq directive.");
}
RegisterReqs.erase(Parser.getTok().getIdentifier());
@@ -7786,16 +7844,31 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
return true;
}
-extern "C" void LLVMInitializeARMAsmLexer();
-
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
- LLVMInitializeARMAsmLexer();
}
#define GET_REGISTER_MATCHER
#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+ unsigned Kind) {
+ ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ if (Kind == MCK__35_0 && Op->isImm()) {
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (!CE)
+ return Match_InvalidOperand;
+ if (CE->getValue() == 0)
+ return Match_Success;
+ }
+ return Match_InvalidOperand;
+}
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index e24a1b17867a..d2012c387cda 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmParser
- ARMAsmLexer.cpp
ARMAsmParser.cpp
)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 377bd9243c2e..b832508a086c 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -11,11 +11,11 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
+ A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
@@ -38,6 +38,7 @@ add_llvm_target(ARMCodeGen
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
+ ARMTargetTransformInfo.cpp
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f00142de50dc..2e009e55e3b0 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -9,21 +9,20 @@
#define DEBUG_TYPE "arm-disassembler"
+#include "llvm/MC/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCExpr.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -105,10 +104,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-private:
};
/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
@@ -131,8 +126,6 @@ public:
raw_ostream &vStream,
raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
private:
mutable ITStatus ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-#include "ARMGenEDInfo.inc"
static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
return new ARMDisassembler(STI);
@@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
return new ThumbDisassembler(STI);
}
-const EDInstInfo *ARMDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
-const EDInstInfo *ThumbDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
const MemoryObject &Region,
uint64_t Address,
@@ -1281,7 +1265,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
unsigned lsb = fieldFromInstruction(Val, 0, 5);
DecodeStatus S = MCDisassembler::Success;
- if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+ if (lsb > msb) {
+ Check(S, MCDisassembler::SoftFail);
+ // The check above will cause the warning for the "potentially undefined
+ // instruction encoding" but we can't build a bad MCOperand value here
+ // with a lsb > msb or else printing the MCInst will cause a crash.
+ lsb = msb;
+ }
uint32_t msb_mask = 0xFFFFFFFF;
if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
@@ -3059,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
true, 2, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
return MCDisassembler::Success;
}
@@ -3288,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (load) {
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index dcc41d93f5ce..2afb20d6686a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -13,11 +13,11 @@
#define DEBUG_TYPE "asm-printer"
#include "ARMInstPrinter.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/MC/MCInst.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
@@ -252,6 +252,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
+ // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when decoding them, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ bool isStore = Opcode == ARM::STREXD;
+ unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+ if (MRC.contains(Reg)) {
+ MCInst NewMI;
+ MCOperand NewReg;
+ NewMI.setOpcode(Opcode);
+
+ if (isStore)
+ NewMI.addOperand(MI->getOperand(0));
+ NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
+ &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewMI.addOperand(NewReg);
+
+ // Copy the rest operands into NewMI.
+ for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ NewMI.addOperand(MI->getOperand(i));
+ printInstruction(&NewMI, O);
+ return;
+ }
+ }
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -264,7 +293,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printRegName(O, Reg);
} else if (Op.isImm()) {
O << markup("<imm:")
- << '#' << Op.getImm()
+ << '#' << formatImm(Op.getImm())
<< markup(">");
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
@@ -290,7 +319,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
O << *MO1.getExpr();
else if (MO1.isImm()) {
O << markup("<mem:") << "[pc, "
- << markup("<imm:") << "#" << MO1.getImm()
+ << markup("<imm:") << "#" << formatImm(MO1.getImm())
<< markup(">]>", "]");
}
else
@@ -598,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
- // FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << (MO2.getImm() << 3);
+ O << ":" << (MO2.getImm() << 3);
}
O << "]" << markup(">");
}
@@ -691,6 +719,15 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
O << "}";
}
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+ O << ", ";
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+
void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -873,7 +910,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << markup("<imm:")
- << "#" << MI->getOperand(OpNum).getImm() * 4
+ << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
<< markup(">");
}
@@ -881,7 +918,7 @@ void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
O << markup("<imm:")
- << "#" << (Imm == 0 ? 32 : Imm)
+ << "#" << formatImm((Imm == 0 ? 32 : Imm))
<< markup(">");
}
@@ -938,7 +975,7 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
if (unsigned ImmOffs = MO2.getImm()) {
O << ", "
<< markup("<imm:")
- << "#" << ImmOffs * Scale
+ << "#" << formatImm(ImmOffs * Scale)
<< markup(">");
}
O << "]" << markup(">");
@@ -1089,7 +1126,7 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
if (MO2.getImm()) {
O << ", "
<< markup("<imm:")
- << "#" << MO2.getImm() * 4
+ << "#" << formatImm(MO2.getImm() * 4)
<< markup(">");
}
O << "]" << markup(">");
@@ -1179,7 +1216,7 @@ void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
O << markup("<imm:")
- << "#" << Imm + 1
+ << "#" << formatImm(Imm + 1)
<< markup(">");
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index b7bab5fdcd8e..edff75d886e9 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -124,6 +124,7 @@ public:
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 000000000000..68afea12ed44
--- /dev/null
+++ b/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants to you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable copyright license to reproduce, prepare derivative
+ works of, publicly display, publicly perform, sublicense, and distribute the
+ Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable (except as stated in this section) patent license
+ to make, have made, use, offer to sell, sell, import, and otherwise transfer
+ the Work, where such license applies only to those patent claims licensable
+ by ARM that are necessarily infringed by ARM's Software alone or by
+ combination of the Software with the Work to which such Software was
+ submitted. If any entity institutes patent litigation against ARM or any
+ other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+ that ARM's Software, or the Work to which ARM has contributed constitutes
+ direct or contributory patent infringement, then any patent licenses granted
+ to that entity under this Agreement for the Software or Work shall terminate
+ as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1ba6ab039f20..e66e98567873 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -8,9 +8,11 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
@@ -21,7 +23,6 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
@@ -114,11 +115,15 @@ public:
MCValue &Target, uint64_t &Value,
bool &IsResolved);
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
bool mayNeedRelaxation(const MCInst &Inst) const;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -161,7 +166,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
@@ -216,7 +221,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
- const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
@@ -552,65 +557,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
-namespace {
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
- uint8_t OSABI;
- ELFARMAsmBackend(const Target &T, const StringRef TT,
- uint8_t _OSABI)
- : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMELFObjectWriter(OS, OSABI);
- }
-};
-
-// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
- unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup, Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
- const object::mach::CPUSubtypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T, TT), Subtype(st) {
- HasDataInCodeSupport = true;
- }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
- Subtype);
- }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
- }
-};
-
/// getFixupKindNumBytes - The number of bytes the fixup may change.
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
@@ -659,8 +605,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
}
-void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
@@ -668,37 +614,70 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
- // For each byte of the fragment that the fixup touches, mask in the
- // bits from the fixup value.
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
for (unsigned i = 0; i != NumBytes; ++i)
Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMELFObjectWriter(OS, OSABI);
+ }
+};
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
} // end anonymous namespace
MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
- if (TheTriple.getArchName() == "armv4t" ||
- TheTriple.getArchName() == "thumbv4t")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
- else if (TheTriple.getArchName() == "armv5e" ||
- TheTriple.getArchName() == "thumbv5e")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
- else if (TheTriple.getArchName() == "armv6" ||
- TheTriple.getArchName() == "thumbv6")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
- else if (TheTriple.getArchName() == "armv7f" ||
- TheTriple.getArchName() == "thumbv7f")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
- else if (TheTriple.getArchName() == "armv7k" ||
- TheTriple.getArchName() == "thumbv7k")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
- else if (TheTriple.getArchName() == "armv7s" ||
- TheTriple.getArchName() == "thumbv7s")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
+ object::mach::CPUSubtypeARM CS =
+ StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
+ .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
+ .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
+ .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
+ .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
+ .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
+ .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
+ .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
+ .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
+ .Default(object::mach::CSARM_V7);
+
+ return new DarwinARMAsmBackend(T, TT, CS);
}
if (TheTriple.isOSWindows())
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 99e4f713f690..f98bbd204c7a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -37,7 +37,6 @@ namespace {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
- virtual unsigned getEFlags() const;
virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
ARMELFObjectWriter::~ARMELFObjectWriter() {}
-// FIXME: get the real EABI Version from the Triple.
-unsigned ARMELFObjectWriter::getEFlags() const {
- return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
-}
-
// In ARM, _MergedGlobals and other most symbols get emitted directly.
// I.e. not as an offset to a section symbol.
// This code is an approximation of what ARM/gcc does.
@@ -133,6 +127,7 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
switch (RelocType) {
default: EmitThisSym = true; break;
case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ case ELF::R_ARM_PREL31: EmitThisSym = false; break;
}
}
@@ -225,6 +220,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_NONE:
+ Type = ELF::R_ARM_NONE;
+ break;
case MCSymbolRefExpr::VK_ARM_GOT:
Type = ELF::R_ARM_GOT_BREL;
break;
@@ -249,7 +247,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_TARGET2:
Type = ELF::R_ARM_TARGET2;
break;
- }
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
+ }
break;
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 000000000000..418971df3292
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,418 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOp.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
+ FnStart(0), Personality(0), CantUnwind(false) {}
+
+ ~ARMELFStreamer() {}
+
+ // ARM exception handling directives
+ virtual void EmitFnStart();
+ virtual void EmitFnEnd();
+ virtual void EmitCantUnwind();
+ virtual void EmitPersonality(const MCSymbol *Per);
+ virtual void EmitHandlerData();
+ virtual void EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset = 0);
+ virtual void EmitPad(int64_t Offset);
+ virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ if (IsThumb)
+ EmitThumbMappingSymbol();
+ else
+ EmitARMMappingSymbol();
+
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ MCELFStreamer::EmitAssemblerFlag(Flag);
+
+ switch (Flag) {
+ case MCAF_SyntaxUnified:
+ return; // no-op here.
+ case MCAF_Code16:
+ IsThumb = true;
+ return; // Change to Thumb mode
+ case MCAF_Code32:
+ IsThumb = false;
+ return; // Change to ARM mode
+ case MCAF_Code64:
+ return;
+ case MCAF_SubsectionsViaSymbols:
+ return;
+ }
+ }
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_ARMELFStreamer;
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_ARM,
+ EMS_Thumb,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitThumbMappingSymbol() {
+ if (LastEMS == EMS_Thumb) return;
+ EmitMappingSymbol("$t");
+ LastEMS = EMS_Thumb;
+ }
+
+ void EmitARMMappingSymbol() {
+ if (LastEMS == EMS_ARM) return;
+ EmitMappingSymbol("$a");
+ LastEMS = EMS_ARM;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ void EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+
+ getAssembler().setIsThumbFunc(Func);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+ SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ }
+
+ // Helper functions for ARM exception handling directives
+ void Reset();
+
+ void EmitPersonalityFixup(StringRef Name);
+
+ void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+ SectionKind Kind, const MCSymbol &Fn);
+ void SwitchToExTabSection(const MCSymbol &FnStart);
+ void SwitchToExIdxSection(const MCSymbol &FnStart);
+
+ bool IsThumb;
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ // ARM Exception Handling Frame Information
+ MCSymbol *ExTab;
+ MCSymbol *FnStart;
+ const MCSymbol *Personality;
+ bool CantUnwind;
+};
+}
+
+inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+ unsigned Type,
+ unsigned Flags,
+ SectionKind Kind,
+ const MCSymbol &Fn) {
+ const MCSectionELF &FnSection =
+ static_cast<const MCSectionELF &>(Fn.getSection());
+
+ // Create the name for new section
+ StringRef FnSecName(FnSection.getSectionName());
+ SmallString<128> EHSecName(Prefix);
+ if (FnSecName != ".text") {
+ EHSecName += FnSecName;
+ }
+
+ // Get .ARM.extab or .ARM.exidx section
+ const MCSectionELF *EHSection = NULL;
+ if (const MCSymbol *Group = FnSection.getGroup()) {
+ EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
+ FnSection.getEntrySize(), Group->getName());
+ } else {
+ EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
+ }
+ assert(EHSection);
+
+ // Switch to .ARM.extab or .ARM.exidx section
+ SwitchSection(EHSection);
+ EmitCodeAlignment(4, 0);
+}
+
+inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.extab",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.exidx",
+ ELF::SHT_ARM_EXIDX,
+ ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+void ARMELFStreamer::Reset() {
+ ExTab = NULL;
+ FnStart = NULL;
+ Personality = NULL;
+ CantUnwind = false;
+}
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(PersonalitySym,
+ MCSymbolRefExpr::VK_ARM_NONE,
+ getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(
+ MCFixup::Create(DF->getContents().size(), PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::EmitFnStart() {
+ assert(FnStart == 0);
+ FnStart = getContext().CreateTempSymbol();
+ EmitLabel(FnStart);
+}
+
+void ARMELFStreamer::EmitFnEnd() {
+ assert(FnStart && ".fnstart must preceeds .fnend");
+
+ // Emit unwind opcodes if there is no .handlerdata directive
+ int PersonalityIndex = -1;
+ if (!ExTab && !CantUnwind) {
+ // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ PersonalityIndex = 1;
+
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+ }
+
+ // Emit the exception index table entry
+ SwitchToExIdxSection(*FnStart);
+
+ if (PersonalityIndex == 1)
+ EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+
+ const MCSymbolRefExpr *FnStartRef =
+ MCSymbolRefExpr::Create(FnStart,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(FnStartRef, 4, 0);
+
+ if (CantUnwind) {
+ EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
+ } else {
+ const MCSymbolRefExpr *ExTabEntryRef =
+ MCSymbolRefExpr::Create(ExTab,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+ EmitValue(ExTabEntryRef, 4, 0);
+ }
+
+ // Clean exception handling frame information
+ Reset();
+}
+
+void ARMELFStreamer::EmitCantUnwind() {
+ CantUnwind = true;
+}
+
+void ARMELFStreamer::EmitHandlerData() {
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ assert(!ExTab);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ // Emit Personality
+ assert(Personality && ".personality directive must preceed .handlerdata");
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(PersonalityRef, 4, 0);
+
+ // Emit unwind opcodes
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= UNWIND_OPCODE_FINISH << 16;
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+}
+
+void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+ Personality = Per;
+}
+
+void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitPad(int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ // TODO: Not implemented
+}
+
+namespace llvm {
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb) {
+ ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+
+}
+
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
new file mode 100644
index 000000000000..77ae5d23628e
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_STREAMER_H
+#define ARM_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb);
+}
+
+#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index d0e127a8f335..7a59a7dd5055 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -24,8 +26,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -655,15 +655,28 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
+ int SoImmVal;
if (offset == INT32_MIN) {
Val = 0x1000;
- offset = 0;
+ SoImmVal = 0;
} else if (offset < 0) {
Val = 0x1000;
offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x2000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
+ } else {
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ if(SoImmVal == -1) {
+ Val = 0x1000;
+ offset *= -1;
+ SoImmVal = ARM_AM::getSOImmVal(offset);
+ }
}
- int SoImmVal = ARM_AM::getSOImmVal(offset);
assert(SoImmVal != -1 && "Not a valid so_imm value!");
Val |= SoImmVal;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 22e14a2281de..fc8505b052bd 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -9,8 +9,8 @@
#define DEBUG_TYPE "armmcexpr"
#include "ARMMCExpr.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
const ARMMCExpr*
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index b404e6c6e014..cd4067a52955 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -64,6 +64,9 @@ public:
return getSubExpr()->FindAssociatedSection();
}
+ // There are no TLS ARMMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 00ffc94ac7d1..f09fb5a94fd8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,10 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMMCTargetDesc.h"
-#include "ARMMCAsmInfo.h"
#include "ARMBaseInfo.h"
+#include "ARMELFStreamer.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -36,6 +38,8 @@
using namespace llvm;
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
+ Triple triple(TT);
+
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
unsigned Len = TT.size();
@@ -118,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
ARMArchFeature += ",+thumb-mode";
}
+ if (triple.isOSNaCl()) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-trap";
+ else
+ ARMArchFeature += ",+nacl-trap";
+ }
+
return ARMArchFeature;
}
@@ -144,7 +155,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitARMMCRegisterInfo(X, ARM::LR);
+ InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
return X;
}
@@ -186,7 +197,8 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
llvm_unreachable("ARM does not support Windows COFF format");
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ TheTriple.getArch() == Triple::thumb);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 2154c931769a..b9efe74b41e5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,17 +7,18 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
new file mode 100644
index 000000000000..dad5576df4cd
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -0,0 +1,112 @@
+//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants for the ARM unwind opcodes and exception
+// handling table entry kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_H
+#define ARM_UNWIND_OP_H
+
+namespace llvm {
+
+ /// ARM exception handling table entry kinds
+ enum ARMEHTEntryKind {
+ EHT_GENERIC = 0x00,
+ EHT_COMPACT = 0x80
+ };
+
+ enum {
+ /// Special entry for the function never unwind
+ EXIDX_CANTUNWIND = 0x1
+ };
+
+ /// ARM-defined frame unwinding opcodes
+ enum ARMUnwindOpcodes {
+ // Format: 00xxxxxx
+ // Purpose: vsp = vsp + ((x << 2) + 4)
+ UNWIND_OPCODE_INC_VSP = 0x00,
+
+ // Format: 01xxxxxx
+ // Purpose: vsp = vsp - ((x << 2) + 4)
+ UNWIND_OPCODE_DEC_VSP = 0x40,
+
+ // Format: 10000000 00000000
+ // Purpose: refuse to unwind
+ UNWIND_OPCODE_REFUSE = 0x8000,
+
+ // Format: 1000xxxx xxxxxxxx
+ // Purpose: pop r[15:12], r[11:4]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000,
+
+ // Format: 1001xxxx
+ // Purpose: vsp = r[x]
+ // Constraint: x != 13 && x != 15
+ UNWIND_OPCODE_SET_VSP = 0x90,
+
+ // Format: 10100xxx
+ // Purpose: pop r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0,
+
+ // Format: 10101xxx
+ // Purpose: pop r14, r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8,
+
+ // Format: 10110000
+ // Purpose: finish
+ UNWIND_OPCODE_FINISH = 0xb0,
+
+ // Format: 10110001 0000xxxx
+ // Purpose: pop r[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK = 0xb100,
+
+ // Format: 10110010 x(uleb128)
+ // Purpose: vsp = vsp + ((x << 2) + 0x204)
+ UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2,
+
+ // Format: 10110011 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300,
+
+ // Format: 10111xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8,
+
+ // Format: 11000xxx
+ // Purpose: pop wR[(10+x):10]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0,
+
+ // Format: 11000110 xxxxyyyy
+ // Purpose: pop wR[(x+y):x]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600,
+
+ // Format: 11000111 0000xxxx
+ // Purpose: pop wCGR[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700,
+
+ // Format: 11001000 xxxxyyyy
+ // Purpose: pop d[(16+x+y):(16+x)]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800,
+
+ // Format: 11001001 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900,
+
+ // Format: 11010xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
+ };
+
+}
+
+#endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 256599412e8b..e17eb4d5e987 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
ARMELFObjectWriter.cpp
+ ARMELFStreamer.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 70643bcda3ac..2e266c2e9624 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -16,16 +16,16 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
static cl::opt<bool>
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 3e48ed1189cc..f069535ff3c0 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 463c440852f5..a64707e6f34f 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -173,7 +173,6 @@ GCC is doing a couple of clever things here:
mov r1, #1
lsl r1, r1, #8
tst r2, r1
-
//===---------------------------------------------------------------------===//
@@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
objects are referenced off the frame pointer with negative offsets. See
oggenc for an example.
-
//===---------------------------------------------------------------------===//
Poor codegen test/CodeGen/ARM/select.ll f7:
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 500e3de82db3..fa5681fb12bf 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index edd73c20c0be..2c3388cc452c 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB,
MRI, MIFlags);
}
+
+void Thumb1FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -124,14 +159,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ bool HasFP = hasFP(MF);
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
// Adjust FP so it point to the stack slot that contains the previous FP.
- if (hasFP(MF)) {
+ if (HasFP) {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0)
.setMIFlags(MachineInstr::FrameSetup));
@@ -146,7 +184,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
MachineInstr::FrameSetup);
- if (STI.isTargetELF() && hasFP(MF))
+ if (STI.isTargetELF() && HasFP)
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
@@ -281,7 +319,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
.addReg(ARM::R3, RegState::Kill);
AddDefaultPred(MIB);
- MIB->copyImplicitOps(&*MBBI);
+ MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
}
@@ -352,7 +390,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index bcfc5165fad0..5a300afd5d36 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,10 @@ public:
const TargetRegisterInfo *TRI) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 735b255759b7..095736d52a88 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -15,8 +15,8 @@
#include "ARM.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a39b722caef5..7452fb776ebd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -18,21 +18,21 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
extern cl::opt<bool> ReuseFrameIndexVals;
@@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-static void emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
- int NumBytes) {
- emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
- MRI);
-}
-
-void Thumb1RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
- } else {
- assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
- emitSPUpdate(MBB, I, TII, dl, *this, Amount);
- }
- }
- }
- MBB.erase(I);
-}
-
/// emitThumbConstant - Emit a series of instructions to materialize a
/// constant.
static void emitThumbConstant(MachineBasicBlock &MBB,
@@ -390,6 +349,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -417,7 +377,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset
MI.RemoveOperand(FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
return true;
}
@@ -428,7 +387,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
@@ -457,7 +415,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
@@ -595,22 +552,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
void
Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
unsigned VReg = 0;
- unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MF.getFrameInfo()->getStackSize() + SPAdj;
@@ -635,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
@@ -647,15 +600,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
assert(AFI->isThumbFunction() &&
"This eliminateFrameIndex only supports Thumb1!");
- if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;
// If we get here, the immediate doesn't fit into the instruction. We folded
@@ -688,11 +641,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
- MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
if (UseRR)
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
} else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
@@ -709,18 +663,17 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
*this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
- MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
if (UseRR)
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
} else {
llvm_unreachable("Unexpected opcode!");
}
// Add predicate back if it's needed.
- if (MI.isPredicable()) {
- MachineInstrBuilder MIB(&MI);
+ if (MI.isPredicable())
AddDefaultPred(MIB);
- }
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index f2e4b08f798e..ebbab36dd7b8 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -43,11 +43,6 @@ public:
unsigned PredReg = 0,
unsigned MIFlags = MachineInstr::NoFlags) const;
- /// Code Generation virtual methods...
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
// rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
// however much remains to be handled. Return 'true' if no further
// work is required.
@@ -62,7 +57,8 @@ public:
const TargetRegisterClass *RC,
unsigned Reg) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
};
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d54aa935325c..97c254ce75a5 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -11,12 +11,12 @@
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumITs, "Number of IT blocks inserted");
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e9e20ddd8783..67e8ec7c5ff2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MachineBasicBlock *MBB = Tail->getParent();
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
if (!AFI->hasITBlocks()) {
- TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
return;
}
@@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
--MBBI;
// Actually replace the tail.
- TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
// Fix up IT.
if (CC != ARMCC::AL) {
@@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Remove offset and remaining explicit predicate operands.
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
AddDefaultPred(MIB);
return true;
}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 29a87d016227..1a7a4d450cfe 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -16,12 +16,12 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index f18f491f4995..d50f5d972232 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,19 +9,21 @@
#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
@@ -52,81 +54,79 @@ namespace {
unsigned PredCC2 : 2;
unsigned PartFlag : 1; // 16-bit instruction does partial flag update
unsigned Special : 1; // Needs to be dealt with specially
+ unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
};
static const ReduceEntry ReduceTable[] = {
- // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
- { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
- { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
- { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
- { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
- //FIXME: Disable CMN, as CCodes are backwards from compare expectations
- //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
- { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: adr.n immediate offset must be multiple of 4.
- //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
- // likely to cause issue in the loop. As a size / performance workaround,
- // they are not marked as such.
- { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
- // FIXME: Do we need the 16-bit 'S' variant?
- { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
- { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
- { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
- { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
-
- // FIXME: Clean this up after splitting each Thumb load / store opcode
- // into multiple ones.
- { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
-
- { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
- { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
class Thumb2SizeReduce : public MachineFunctionPass {
@@ -147,8 +147,7 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
- bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
- bool IsSelfLoop);
+ bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
@@ -158,30 +157,52 @@ namespace {
const ReduceEntry &Entry);
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry, bool LiveCPSR,
- MachineInstr *CPSRDef, bool IsSelfLoop);
+ const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
+ const ReduceEntry &Entry, bool LiveCPSR,
bool IsSelfLoop);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
+ const ReduceEntry &Entry, bool LiveCPSR,
bool IsSelfLoop);
+ /// ReduceMI - Attempt to reduce MI, return true on success.
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop);
+
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
+
+ bool OptimizeSize;
+ bool MinimizeSize;
+
+ // Last instruction to define CPSR in the current block.
+ MachineInstr *CPSRDef;
+ // Was CPSR last defined by a high latency instruction?
+ // When CPSRDef is null, this refers to CPSR defs in predecessors.
+ bool HighLatencyCPSR;
+
+ struct MBBInfo {
+ // The flags leaving this block have high latency.
+ bool HighLatencyCPSR;
+ // Has this block been visited yet?
+ bool Visited;
+
+ MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ };
+
+ SmallVector<MBBInfo, 8> BlockInfo;
};
char Thumb2SizeReduce::ID = 0;
}
Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ OptimizeSize = MinimizeSize = false;
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -196,6 +217,16 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
return false;
}
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+ switch(Def->getOpcode()) {
+ case ARM::FMSTAT:
+ case ARM::tMUL:
+ return true;
+ }
+ return false;
+}
+
/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
/// the 's' 16-bit instruction partially update CPSR. Abort the
/// transformation to avoid adding false dependency on last CPSR setting
@@ -214,20 +245,19 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
/// In this case it would have been ok to narrow the mul.w to muls since there
/// are indirect RAW dependency between the muls and the mul.w
bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
- bool FirstInSelfLoop) {
- // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
- if (!STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
+ // Disable the check for -Oz (aka OptimizeForSizeHarder).
+ if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
return false;
- if (!Def)
+ if (!CPSRDef)
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
- return FirstInSelfLoop;
+ return HighLatencyCPSR || FirstInSelfLoop;
SmallSet<unsigned, 2> Defs;
- for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = Def->getOperand(i);
+ for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = CPSRDef->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -245,6 +275,16 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
return false;
}
+ // If the current CPSR has high latency, try to avoid the false dependency.
+ if (HighLatencyCPSR)
+ return true;
+
+ // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+ // of them, so always shrink them when CPSR doesn't have high latency.
+ if (Use->getOpcode() == ARM::t2MOVi ||
+ Use->getOpcode() == ARM::t2MOVi16)
+ return false;
+
// No read-after-write dependency. The narrowing will add false dependency.
return true;
}
@@ -487,16 +527,15 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::t2ADDri) {
// If the source register is SP, try to reduce to tADDrSPi, otherwise
// it's a normal reduce.
if (MI->getOperand(1).getReg() != ARM::SP) {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
// Try to reduce to tADDrSPi.
unsigned Imm = MI->getOperand(2).getImm();
@@ -546,12 +585,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
// fallthrough
}
case ARM::t2ADDSrr:
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
}
break;
@@ -563,13 +602,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2UXTB:
case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
break;
case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses.
if (MI->getOperand(1).isImm())
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
break;
case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two
@@ -578,10 +617,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// are prioritized, but the table assumes a unique entry for each
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
- { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
- if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
}
return false;
@@ -590,12 +629,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
// t2MUL is "special". The tied source operand is second, not first.
@@ -666,7 +710,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+ canAddPseudoFlagDep(MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
@@ -703,11 +747,16 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Limit = ~0U;
if (Entry.Imm1Limit)
Limit = (1 << Entry.Imm1Limit) - 1;
@@ -757,7 +806,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+ canAddPseudoFlagDep(MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
@@ -841,14 +890,57 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
return LiveCPSR;
}
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, bool IsSelfLoop) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI == ReduceOpcodeMap.end())
+ return false;
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+ // Don't attempt normal reductions on "special" cases for now.
+ if (Entry.Special)
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+ return true;
+
+ return false;
+}
+
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Modified = false;
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
- MachineInstr *CPSRDef = 0;
MachineInstr *BundleMI = 0;
+ CPSRDef = 0;
+ HighLatencyCPSR = false;
+
+ // Check predecessors for the latest CPSRDef.
+ bool HasBackEdges = false;
+ for (MachineBasicBlock::pred_iterator
+ I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
+ const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+ if (!PInfo.Visited) {
+ // Since blocks are visited in RPO, this must be a back-edge.
+ HasBackEdges = true;
+ continue;
+ }
+ if (PInfo.HighLatencyCPSR) {
+ HighLatencyCPSR = true;
+ break;
+ }
+ }
+
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
@@ -862,43 +954,25 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
BundleMI = MI;
continue;
}
+ if (MI->isDebugValue())
+ continue;
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
- if (OPI != ReduceOpcodeMap.end()) {
- const ReduceEntry &Entry = ReduceTable[OPI->second];
- // Ignore "special" cases for now.
- if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit two-address instruction.
- if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit non-two-address instruction.
- if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
+ // Does NextMII belong to the same bundle as MI?
+ bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+ Modified = true;
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
+ MI = &*I;
+ // Removing and reinserting the first instruction in a bundle will break
+ // up the bundle. Fix the bundling if it was broken.
+ if (NextInSameBundle && !NextMII->isBundledWithPred())
+ NextMII->bundleWithPred();
}
- ProcessNext:
- if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+ if (!NextInSameBundle && MI->isInsideBundle()) {
// FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
// marker is only on the BUNDLE instruction. Process the BUNDLE
// instruction as we finish with the bundled instruction to work around
@@ -915,14 +989,19 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
+ HighLatencyCPSR = false;
IsSelfLoop = false;
} else if (DefCPSR) {
// This is the last CPSR defining instruction.
CPSRDef = MI;
+ HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
IsSelfLoop = false;
}
}
+ MBBInfo &Info = BlockInfo[MBB.getNumber()];
+ Info.HighLatencyCPSR = HighLatencyCPSR;
+ Info.Visited = true;
return Modified;
}
@@ -931,9 +1010,23 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
STI = &TM.getSubtarget<ARMSubtarget>();
+ // Optimizing / minimizing size?
+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+ OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+
+ BlockInfo.clear();
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Visit blocks in reverse post-order so LastCPSRDef is known for all
+ // predecessors.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
bool Modified = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Modified |= ReduceMBB(*I);
+ for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ Modified |= ReduceMBB(**I);
return Modified;
}
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index a85acaaa1494..02ac493b4215 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,16 +1,13 @@
add_llvm_library(LLVMTarget
Mangler.cpp
Target.cpp
- TargetInstrInfo.cpp
TargetIntrinsicInfo.cpp
TargetJITInfo.cpp
TargetLibraryInfo.cpp
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
- TargetRegisterInfo.cpp
TargetSubtargetInfo.cpp
- TargetTransformImpl.cpp
)
foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
deleted file mode 100644
index 1f8ca8681c09..000000000000
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SPU.td)
-
-tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv)
-add_public_tablegen_target(CellSPUCommonTableGen)
-
-add_llvm_target(CellSPUCodeGen
- SPUAsmPrinter.cpp
- SPUHazardRecognizers.cpp
- SPUInstrInfo.cpp
- SPUISelDAGToDAG.cpp
- SPUISelLowering.cpp
- SPUFrameLowering.cpp
- SPUMachineFunction.cpp
- SPURegisterInfo.cpp
- SPUSubtarget.cpp
- SPUTargetMachine.cpp
- SPUSelectionDAGInfo.cpp
- SPUNopFiller.cpp
- )
-
-add_dependencies(LLVMCellSPUCodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
deleted file mode 100644
index cdb4099ffbca..000000000000
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-///--==-- Arithmetic ops intrinsics --==--
-def CellSDKah:
- RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
-def CellSDKahi:
- RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
-def CellSDKa:
- RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
-def CellSDKai:
- RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
-def CellSDKsfh:
- RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
-def CellSDKsfhi:
- RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
-def CellSDKsf:
- RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
-def CellSDKsfi:
- RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
-def CellSDKaddx:
- RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
-def CellSDKcg:
- RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
-def CellSDKcgx:
- RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
-def CellSDKsfx:
- RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
-def CellSDKbg:
- RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
-def CellSDKbgx:
- RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
-
-def CellSDKmpy:
- RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpy $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyu:
- RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyu $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))] >;
-
-def CellSDKmpyi:
- RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "mpyi $rT, $rA, $val", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))]>;
-
-def CellSDKmpyui:
- RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "mpyui $rT, $rA, $val", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))]>;
-
-def CellSDKmpya:
- RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB),
- (v8i16 VECREG:$rC)))]>;
-
-def CellSDKmpyh:
- RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyh $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpys:
- RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpys $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhh:
- RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhh $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhha:
- RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhha $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
-// as an intrinsic for the time being
-def CellSDKmpyhhu:
- RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhhau:
- RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKand:
- RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "and\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandc:
- RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "andc\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandbi:
- RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "andbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKandhi:
- RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "andhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKandi:
- RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "andi\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "or\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorc:
- RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "addc\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "orbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKorhi:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "orhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKori:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ori\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKxor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "xor\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKxorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "xorbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKxorhi:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "xorhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKxori:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "xori\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKnor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "nor\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKnand:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "nand\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-//===----------------------------------------------------------------------===//
-// Shift/rotate intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKshli:
- Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
- (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqbi:
- Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
- (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqii:
- Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
- (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqby:
- Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
- (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqbyi:
- Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
- (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-
-//===----------------------------------------------------------------------===//
-// Branch/compare intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKceq:
- RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceq\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKceqi:
- RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ceqi\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKceqb:
- RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceqb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKceqbi:
- RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "ceqbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKceqh:
- RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceqh\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKceqhi:
- RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ceqhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-def CellSDKcgth:
- RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgth\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKcgthi:
- RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "cgthi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKcgt:
- RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgt\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKcgti:
- RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "cgti\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKcgtb:
- RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgtb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKcgtbi:
- RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "cgtbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKclgth:
- RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgth\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKclgthi:
- RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "clgthi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKclgt:
- RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgt\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKclgti:
- RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "clgti\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKclgtb:
- RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgtb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKclgtbi:
- RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "clgtbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-//===----------------------------------------------------------------------===//
-// Floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKfa:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fa\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfs:
- RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fs\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfm:
- RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfceq:
- RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fceq\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcgt:
- RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcgt\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmeq:
- RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcmeq\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmgt:
- RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcmgt\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfma:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfnms:
- RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfms:
- RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-//===----------------------------------------------------------------------===//
-// Double precision floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKdfa:
- RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfa\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfs:
- RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfs\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfm:
- RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfm\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfma:
- RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfma\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnma:
- RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfnma\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnms:
- RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfnms\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfms:
- RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfms\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 0027bdbf6ca1..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_llvm_library(LLVMCellSPUDesc
- SPUMCTargetDesc.cpp
- SPUMCAsmInfo.cpp
- )
-
-add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
deleted file mode 100644
index 4bad37eacaf7..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the SPUMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCAsmInfo.h"
-using namespace llvm;
-
-void SPULinuxMCAsmInfo::anchor() { }
-
-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
- IsLittleEndian = false;
-
- ZeroDirective = "\t.space\t";
- Data64bitsDirective = "\t.quad\t";
- AlignmentIsInBytes = false;
-
- PCSymbol = ".";
- CommentString = "#";
- GlobalPrefix = "";
- PrivateGlobalPrefix = ".L";
-
- // Has leb128
- HasLEB128 = true;
-
- SupportsDebugInformation = true;
-
- // Exception handling is not supported on CellSPU (think about it: you only
- // have 256K for code+data. Would you support exception handling?)
- ExceptionsType = ExceptionHandling::None;
-
- // SPU assembly requires ".section" before ".bss"
- UsesELFSectionDirectiveForBSS = true;
-}
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
deleted file mode 100644
index 8450e2c6634c..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Cell SPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCTargetDesc.h"
-#include "SPUMCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "SPUGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createSPUMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitSPUMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitSPUMCRegisterInfo(X, SPU::R0);
- return X;
-}
-
-static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitSPUMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
- MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
-
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- MAI->addInitialFrameState(0, Dst, Src);
-
- return MAI;
-}
-
-static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
- return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
- createSPUMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
- createCellSPUMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
- createSPUMCSubtargetInfo);
-}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
deleted file mode 100644
index d26449e8908f..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides CellSPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
-
-namespace llvm {
-class Target;
-
-extern Target TheCellSPUTarget;
-
-} // End llvm namespace
-
-// Define symbolic names for Cell registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-// Defines symbolic names for the SPU instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "SPUGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
deleted file mode 100644
index d7a8247f5702..000000000000
--- a/lib/Target/CellSPU/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMCellSPUCodeGen
-TARGET = SPU
-BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \
- SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
- SPUGenDAGISel.inc \
- SPUGenSubtargetInfo.inc SPUGenCallingConv.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
deleted file mode 100644
index 3bce9609bfef..000000000000
--- a/lib/Target/CellSPU/README.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
-
-This code was contributed by a team from the Computer Systems Research
-Department in The Aerospace Corporation:
-
-- Scott Michel (head bottle washer and much of the non-floating point
- instructions)
-- Mark Thomas (floating point instructions)
-- Michael AuYeung (intrinsics)
-- Chandler Carruth (LLVM expertise)
-- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
-
-Some minor fixes added by Kalle Raiskila.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
-OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
-OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
-OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
-LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
-REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
-OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
-SUCH DAMAGES ARE FORESEEABLE.
-
----------------------------------------------------------------------------
---WARNING--:
---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
---WARNING--:
-
-If you are brave enough to try this code or help to hack on it, be sure
-to add 'spu' to configure's --enable-targets option, e.g.:
-
- ./configure <your_configure_flags_here> \
- --enable-targets=x86,x86_64,powerpc,spu
-
----------------------------------------------------------------------------
-
-TODO:
-* In commit r142152 vector legalization was set to element promotion per
- default. This breaks half vectors (e.g. v2i32) badly as they get element
- promoted to much slower types (v2i64).
-
-* Many CellSPU specific codegen tests only grep & count the number of
- instructions, not checking their place with FileCheck. There have also
- been some commits that change the CellSPU checks, some of which might
- have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU
- assembly. (especially since about the time of r142152)
-
-* Some of the i64 math have huge tablegen rules, which sometime cause
- tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics
- should probably be done with libraries.
-
-* Create a machine pass for performing dual-pipeline scheduling specifically
- for CellSPU, and insert branch prediction instructions as needed.
-
-* i32 instructions:
-
- * i32 division (work-in-progress)
-
-* i64 support (see i64operations.c test harness):
-
- * shifts and comparison operators: done
- * sign and zero extension: done
- * addition: done
- * subtraction: needed
- * multiplication: done
-
-* i128 support:
-
- * zero extension, any extension: done
- * sign extension: done
- * arithmetic operators (add, sub, mul, div): needed
- * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
-
- * or: done
-
-* f64 support
-
- * Comparison operators:
- SETOEQ unimplemented
- SETOGT unimplemented
- SETOGE unimplemented
- SETOLT unimplemented
- SETOLE unimplemented
- SETONE unimplemented
- SETO done (lowered)
- SETUO done (lowered)
- SETUEQ unimplemented
- SETUGT unimplemented
- SETUGE unimplemented
- SETULT unimplemented
- SETULE unimplemented
- SETUNE unimplemented
-
-* LLVM vector suport
-
- * VSETCC needs to be implemented. It's pretty straightforward to code, but
- needs implementation.
-
-* Intrinsics
-
- * spu.h instrinsics added but not tested. Need to have an operational
- llvm-spu-gcc in order to write a unit test harness.
-
-===-------------------------------------------------------------------------===
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
deleted file mode 100644
index c660131706cb..000000000000
--- a/lib/Target/CellSPU/SPU.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Cell SPU back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_IBMCELLSPU_H
-#define LLVM_TARGET_IBMCELLSPU_H
-
-#include "MCTargetDesc/SPUMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class SPUTargetMachine;
- class FunctionPass;
- class formatted_raw_ostream;
-
- FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
- FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
-
-}
-
-#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
deleted file mode 100644
index e835b9cac8e1..000000000000
--- a/lib/Target/CellSPU/SPU.td
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the top level entry point for the STI Cell SPU target machine.
-//
-//===----------------------------------------------------------------------===//
-
-// Get the target-independent interfaces which we are implementing.
-//
-include "llvm/Target/Target.td"
-
-// Holder of code fragments (you'd think this'd already be in
-// a td file somewhere... :-)
-
-class CodeFrag<dag frag> {
- dag Fragment = frag;
-}
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "SPURegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction formats, instructions
-//===----------------------------------------------------------------------===//
-
-include "SPUNodes.td"
-include "SPUOperands.td"
-include "SPUSchedule.td"
-include "SPUInstrFormats.td"
-include "SPUInstrInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget features:
-//===----------------------------------------------------------------------===//
-
-def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
-def LargeMemFeature:
- SubtargetFeature<"large_mem","UseLargeMem", "true",
- "Use large (>256) LSA memory addressing [default = false]">;
-
-def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
-
-//===----------------------------------------------------------------------===//
-// Calling convention:
-//===----------------------------------------------------------------------===//
-
-include "SPUCallingConv.td"
-
-// Target:
-
-def SPUInstrInfo : InstrInfo {
- let isLittleEndianEncoding = 1;
-}
-
-def SPU : Target {
- let InstructionSet = SPUInstrInfo;
-}
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
deleted file mode 100644
index e051e047333a..000000000000
--- a/lib/Target/CellSPU/SPU128InstrInfo.td
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
-//
-// Cell SPU 128-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-// zext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (zext R32C:$rSrc)),
- (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// zext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (zext R64C:$rSrc)),
- (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// zext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (zext R16C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// zext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (zext R8C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// anyext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (anyext R32C:$rSrc)),
- (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// anyext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (anyext R64C:$rSrc)),
- (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// anyext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (anyext R16C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// anyext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (anyext R8C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// Shift left
-def : Pat<(shl GPRC:$rA, R32C:$rB),
- (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
deleted file mode 100644
index bea33b5362d2..000000000000
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ /dev/null
@@ -1,408 +0,0 @@
-//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
-//
-// Cell SPU 64-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// 64-bit comparisons:
-//
-// 1. The instruction sequences for vector vice scalar differ by a
-// constant. In the scalar case, we're only interested in the
-// top two 32-bit slots, whereas we're interested in an exact
-// all-four-slot match in the vector case.
-//
-// 2. There are no "immediate" forms, since loading 64-bit constants
-// could be a constant pool load.
-//
-// 3. i64 setcc results are i32, which are subsequently converted to a FSM
-// mask when used in a select pattern.
-//
-// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
-// [Note: this may be moot, since gb produces v4i32 or r32.]
-//
-// 5. The code sequences for r64 and v2i64 are probably overly conservative,
-// compared to the code that gcc produces.
-//
-// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBr64_cond:
- SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
- [/* no pattern */]>;
-
-// The generic i64 select pattern, which assumes that the comparison result
-// is in a 32-bit register that contains a select mask pattern (i.e., gather
-// bits result):
-
-def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
- (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
-
-// select the negative condition:
-class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
- Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
- (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
-
-// setcc the negative condition:
-class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
- Pat<(cond R64C:$rA, R64C:$rB),
- (XORIr32 compare.Fragment, -1)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// The i64 seteq fragment that does the scalar->vector conversion and
-// comparison:
-def CEQr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
-
-// The i64 seteq fragment that does the vector comparison
-def CEQv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
-
-// i64 seteq (equality): the setcc result is i32, which is converted to a
-// vector FSM mask when used in a select pattern.
-//
-// v2i64 seteq (equality): the setcc result is v4i32
-multiclass CompareEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CEQr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
-}
-
-defm I64EQ: CompareEqual64;
-
-def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
-def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
-
-// i64 setne:
-def : I64SETCCNegCond<setne, I64EQr64>;
-def : I64SELECTNegCond<setne, I64EQr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setugt/setule:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGTr64ugt:
- CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CLGTr64eq:
- CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CLGTr64compare:
- CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
- (XSWDv2i64 CLGTr64ugt.Fragment),
- CLGTr64eq.Fragment)>;
-
-def CLGTv2i64ugt:
- CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CLGTv2i64eq:
- CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CLGTv2i64compare:
- CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
- (XSWDv2i64 CLGTr64ugt.Fragment),
- CLGTv2i64eq.Fragment)>;
-
-multiclass CompareLogicalGreaterThan64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64LGT: CompareLogicalGreaterThan64;
-
-def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-// I64LGTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setule, I64LGTr64>;
-def : I64SELECTNegCond<setule, I64LGTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setuge/setult:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGEr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
- CLGTr64eq.Fragment)), 0xb)>;
-
-def CLGEv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
- CLGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareLogicalGreaterEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64LGE: CompareLogicalGreaterEqual64;
-
-def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
-def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
- I64LGEv2i64.Fragment>;
-
-
-// i64 setult:
-def : I64SETCCNegCond<setult, I64LGEr64>;
-def : I64SELECTNegCond<setult, I64LGEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setgt/setle:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CGTr64sgt:
- CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CGTr64eq:
- CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CGTr64compare:
- CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
- (XSWDv2i64 CGTr64sgt.Fragment),
- CGTr64eq.Fragment)>;
-
-def CGTv2i64sgt:
- CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CGTv2i64eq:
- CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CGTv2i64compare:
- CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
- (XSWDv2i64 CGTr64sgt.Fragment),
- CGTv2i64eq.Fragment)>;
-
-multiclass CompareGreaterThan64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CGTr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64GT: CompareLogicalGreaterThan64;
-
-def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-// I64GTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setle, I64GTr64>;
-def : I64SELECTNegCond<setle, I64GTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setge/setlt:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CGEr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
- CGTr64eq.Fragment)), 0xb)>;
-
-def CGEv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
- CGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareGreaterEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64GE: CompareGreaterEqual64;
-
-def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
-def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
- I64GEv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setlt, I64GEr64>;
-def : I64SELECTNegCond<setlt, I64GEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 add
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_add_cg<dag lhs, dag rhs>:
- CodeFrag<(CGv4i32 lhs, rhs)>;
-
-class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
- CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
-
-class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
- v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
-
-def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_add<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 subtraction
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
-
-class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
- CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
-
-def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS
- v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_sub<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB),
- v2i64_sub_bg<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 multiply
-//
-// Note: i64 multiply is simply the vector->scalar conversion of the
-// full-on v2i64 multiply, since the entire vector has to be manipulated
-// anyway.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_mul_ahi64<dag rA> :
- CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_bhi64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_alo64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_blo64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_ashlq2<dag rA>:
- CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
-
-class v2i64_mul_ashlq4<dag rA>:
- CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
-
-class v2i64_mul_bshlq2<dag rB> :
- CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
-
-class v2i64_mul_bshlq4<dag rB> :
- CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
-
-class v2i64_highprod<dag rA, dag rB>:
- CodeFrag<(Av4i32
- (Av4i32
- (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
- v2i64_mul_ahi64<rA>.Fragment),
- (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
- v2i64_mul_bshlq4<rB>.Fragment)),
- (Av4i32
- (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
- v2i64_mul_ashlq4<rA>.Fragment),
- (Av4i32
- (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
- v2i64_mul_bhi64<rB>.Fragment),
- (Av4i32
- (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
- v2i64_mul_bhi64<rB>.Fragment),
- (Av4i32
- (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment),
- (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment))))))>;
-
-class v2i64_mul_a3_b3<dag rA, dag rB>:
- CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
- v2i64_mul_blo64<rB>.Fragment)>;
-
-class v2i64_mul_a2_b3<dag rA, dag rB>:
- CodeFrag<(SELBv4i32 (SHLQBYIv4i32
- (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment), 0x2),
- (ILv4i32 0),
- (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_mul_a3_b2<dag rA, dag rB>:
- CodeFrag<(SELBv4i32 (SHLQBYIv4i32
- (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
- v2i64_mul_ashlq2<rA>.Fragment), 0x2),
- (ILv4i32 0),
- (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
- v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
- v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
- v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
-
-class v2i64_mul<dag rA, dag rB, dag rCGmask>:
- v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
- (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
- (ILv4i32 0),
- (FSMBIv4i32 0x0f0f)),
- rCGmask>;
-
-def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f64 comparisons
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBf64_cond:
- SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
- [(set R64FP:$rT,
- (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
deleted file mode 100644
index 3396e8b1ef39..000000000000
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ /dev/null
@@ -1,333 +0,0 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to Cell SPU assembly language. This printer
-// is the output mechanism used by `llc'.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asmprinter"
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- class SPUAsmPrinter : public AsmPrinter {
- public:
- explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
- AsmPrinter(TM, Streamer) {}
-
- virtual const char *getPassName() const {
- return "STI CBEA SPU Assembly Printer";
- }
-
- /// printInstruction - This method is automatically generated by tablegen
- /// from the instruction set description.
- void printInstruction(const MachineInstr *MI, raw_ostream &OS);
- static const char *getRegisterName(unsigned RegNo);
-
-
- void EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
- }
- void printOp(const MachineOperand &MO, raw_ostream &OS);
-
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- if (MO.isReg()) {
- O << getRegisterName(MO.getReg());
- } else if (MO.isImm()) {
- O << MO.getImm();
- } else {
- printOp(MO, O);
- }
- }
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
-
-
- void
- printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- unsigned int value = MI->getOperand(OpNo).getImm();
- assert(value < (1 << 8) && "Invalid u7 argument");
- O << value;
- }
-
- void
- printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- char value = MI->getOperand(OpNo).getImm();
- O << (int) value;
- O << "(";
- printOperand(MI, OpNo+1, O);
- O << ")";
- }
-
- void
- printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (short) MI->getOperand(OpNo).getImm();
- }
-
- void
- printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (unsigned short)MI->getOperand(OpNo).getImm();
- }
-
- void
- printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // When used as the base register, r0 reads constant zero rather than
- // the value contained in the register. For this reason, the darwin
- // assembler requires that we print r0 as 0 (no r) when used as the base.
- const MachineOperand &MO = MI->getOperand(OpNo);
- O << getRegisterName(MO.getReg()) << ", ";
- printOperand(MI, OpNo+1, O);
- }
-
- void
- printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- unsigned int value = MI->getOperand(OpNo).getImm();
- assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
- O << value;
- }
-
- void
- printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- short value = MI->getOperand(OpNo).getImm();
- assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
- && "Invalid s10 argument");
- O << value;
- }
-
- void
- printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- short value = MI->getOperand(OpNo).getImm();
- assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
- O << value;
- }
-
- void
- printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- assert(MI->getOperand(OpNo).isImm() &&
- "printDFormAddr first operand is not immediate");
- int64_t value = int64_t(MI->getOperand(OpNo).getImm());
- int16_t value16 = int16_t(value);
- assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
- && "Invalid dform s10 offset argument");
- O << (value16 & ~0xf) << "(";
- printOperand(MI, OpNo+1, O);
- O << ")";
- }
-
- void
- printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- /* Note: operand 1 is an offset or symbol name. */
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- if (MI->getOperand(OpNo+1).isImm()) {
- int displ = int(MI->getOperand(OpNo+1).getImm());
- if (displ > 0)
- O << "+" << displ;
- else if (displ < 0)
- O << displ;
- }
- }
- }
-
- void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // Used to generate a ".-<target>", but it turns out that the assembler
- // really wants the target.
- //
- // N.B.: This operand is used for call targets. Branch hints are another
- // animal entirely.
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- O << "@h";
- }
- }
-
- void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- O << "@l";
- }
- }
-
- /// Print local store address
- void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- int value = (int) MI->getOperand(OpNo).getImm();
- assert((value >= 0 && value < 16)
- && "Invalid negated immediate rotate 7-bit argument");
- O << -value;
- } else {
- llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
- }
- }
-
- void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
- assert(MI->getOperand(OpNo).isImm() &&
- "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
- int value = (int) MI->getOperand(OpNo).getImm();
- assert((value >= 0 && value <= 32)
- && "Invalid negated immediate rotate 7-bit argument");
- O << -value;
- }
- };
-} // end of anonymous namespace
-
-// Include the auto-generated portion of the assembly writer
-#include "SPUGenAsmWriter.inc"
-
-void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
- switch (MO.getType()) {
- case MachineOperand::MO_Immediate:
- report_fatal_error("printOp() does not handle immediate values");
-
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- return;
- case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
- return;
- case MachineOperand::MO_ExternalSymbol:
- // Computing the address of an external symbol, not calling it.
- if (TM.getRelocationModel() != Reloc::Static) {
- O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
- << "$non_lazy_ptr";
- return;
- }
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- return;
- case MachineOperand::MO_GlobalAddress:
- // External or weakly linked global variables need non-lazily-resolved
- // stubs
- if (TM.getRelocationModel() != Reloc::Static) {
- const GlobalValue *GV = MO.getGlobal();
- if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
- GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
- O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- return;
- }
- }
- O << *Mang->getSymbol(MO.getGlobal());
- return;
- case MachineOperand::MO_MCSymbol:
- O << *(MO.getMCSymbol());
- return;
- default:
- O << "<unknown operand type: " << MO.getType() << ">";
- return;
- }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- // Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default:
- // See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
- case 'L': // Write second word of DImode reference.
- // Verify that this operand has two consecutive registers.
- if (!MI->getOperand(OpNo).isReg() ||
- OpNo+1 == MI->getNumOperands() ||
- !MI->getOperand(OpNo+1).isReg())
- return true;
- ++OpNo; // Return the high-part.
- break;
- }
- }
-
- printOperand(MI, OpNo, O);
- return false;
-}
-
-bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
- printMemRegReg(MI, OpNo, O);
- return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUAsmPrinter() {
- RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
-}
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
deleted file mode 100644
index 9bc6be79860b..000000000000
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the STI Cell SPU architecture.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-
-// Return-value convention for Cell SPU: return value to be passed in reg 3-74
-def RetCC_SPU : CallingConv<[
- CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
-]>;
-
-
-//===----------------------------------------------------------------------===//
-// CellSPU Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CCC_SPU : CallingConv<[
- CCIfType<[i8, i16, i32, i64, i128, f32, f64,
- v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>
-]>;
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
deleted file mode 100644
index f01199515a11..000000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUFrameLowering.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameLowering:
-//===----------------------------------------------------------------------===//
-
-SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
- Subtarget(sti) {
- LR[0].first = SPU::R0;
- LR[0].second = 16;
-}
-
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register. This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- return MFI->getStackSize() &&
- (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects());
-}
-
-
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
-
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = getStackAlignment();
- unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
- assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
- unsigned AlignMask = Align - 1;
-
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
-
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
-
- // Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
- // Update frame info.
- MFI->setStackSize(FrameSize);
-}
-
-void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const SPUInstrInfo &TII =
- *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineModuleInfo &MMI = MF.getMMI();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Prepare for debug frame info.
- bool hasDebugInfo = MMI.hasDebugInfo();
- MCSymbol *FrameLabel = 0;
-
- // Move MBBI back to the beginning of the function.
- MBBI = MBB.begin();
-
- // Work out frame sizes.
- determineFrameLayout(MF);
- int FrameSize = MFI->getStackSize();
-
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
- if (hasDebugInfo) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
- }
-
- // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
- // for the ABI
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
- .addReg(SPU::R1);
- if (isInt<10>(FrameSize)) {
- // Spill $sp to adjusted $sp
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
- .addReg(SPU::R1);
- // Adjust $sp by required amout
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (isInt<16>(FrameSize)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(-16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
- .addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
-
- if (hasDebugInfo) {
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
- // Show update of SP.
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == SPU::R0) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
- }
-
- // Mark effective beginning of when frame pointer is ready.
- MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
- MachineLocation FPDst(SPU::R1);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
- }
- }
-}
-
-void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- const SPUInstrInfo &TII =
- *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int FrameSize = MFI->getStackSize();
- int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
- DebugLoc dl = MBBI->getDebugLoc();
-
- assert(MBBI->getOpcode() == SPU::RET &&
- "Can only insert epilog into returning blocks");
- assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = FrameSize + SPUFrameLowering::minStackSize();
- if (isInt<10>(FrameSize + LinkSlotOffset)) {
- // Reload $lr, adjust $sp by required amount
- // Note: We do this to slightly improve dual issue -- not by much, but it
- // is an opportunity for dual issue.
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(FrameSize + LinkSlotOffset)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
- .addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
- addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
- }
-}
-
-void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const{
- // Mark LR and SP unused, since the prolog spills them to stack and
- // we don't want anyone else to spill them for us.
- //
- // Also, unless R2 is really used someday, don't spill it automatically.
- MF.getRegInfo().setPhysRegUnused(SPU::R0);
- MF.getRegInfo().setPhysRegUnused(SPU::R1);
- MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = &SPU::R32CRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
-}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
deleted file mode 100644
index 11c52818dd9c..000000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains CellSPU frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_FRAMEINFO_H
-#define SPU_FRAMEINFO_H
-
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class SPUSubtarget;
-
- class SPUFrameLowering: public TargetFrameLowering {
- const SPUSubtarget &Subtarget;
- std::pair<unsigned, int> LR[1];
-
- public:
- SPUFrameLowering(const SPUSubtarget &sti);
-
- //! Determine the frame's layour
- void determineFrameLayout(MachineFunction &MF) const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- //! Prediate: Target has dedicated frame pointer
- bool hasFP(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
- //! Return a function's saved spill slots
- /*!
- For CellSPU, a function's saved spill slots is just the link register.
- */
- const std::pair<unsigned, int> *
- getCalleeSaveSpillSlots(unsigned &NumEntries) const;
-
- //! Stack slot size (16 bytes)
- static int stackSlotSize() {
- return 16;
- }
- //! Maximum frame offset representable by a signed 10-bit integer
- /*!
- This is the maximum frame offset that can be expressed as a 10-bit
- integer, used in D-form addresses.
- */
- static int maxFrameOffset() {
- return ((1 << 9) - 1) * stackSlotSize();
- }
- //! Minimum frame offset representable by a signed 10-bit integer
- static int minFrameOffset() {
- return -(1 << 9) * stackSlotSize();
- }
- //! Minimum frame size (enough to spill LR + SP)
- static int minStackSize() {
- return (2 * stackSlotSize());
- }
- //! Convert frame index to stack offset
- static int FItoStackOffset(int frame_index) {
- return frame_index * stackSlotSize();
- }
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
deleted file mode 100644
index 67a83f16a649..000000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements hazard recognizers for scheduling on Cell SPU
-// processors.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sched"
-
-#include "SPUHazardRecognizers.h"
-#include "SPU.h"
-#include "SPUInstrInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Cell SPU hazard recognizer
-//
-// This is the pipeline hazard recognizer for the Cell SPU processor. It does
-// very little right now.
-//===----------------------------------------------------------------------===//
-
-/// Return the pipeline hazard type encountered or generated by this
-/// instruction. Currently returns NoHazard.
-///
-/// \return NoHazard
-ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
-{
- // Initial thoughts on how to do this, but this code cannot work unless the
- // function's prolog and epilog code are also being scheduled so that we can
- // accurately determine which pipeline is being scheduled.
-#if 0
- assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
-
- const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
- ScheduleHazardRecognizer::HazardType retval = NoHazard;
- bool mustBeOdd = false;
-
- switch (Node->getOpcode()) {
- case SPU::LQDv16i8:
- case SPU::LQDv8i16:
- case SPU::LQDv4i32:
- case SPU::LQDv4f32:
- case SPU::LQDv2f64:
- case SPU::LQDr128:
- case SPU::LQDr64:
- case SPU::LQDr32:
- case SPU::LQDr16:
- case SPU::LQAv16i8:
- case SPU::LQAv8i16:
- case SPU::LQAv4i32:
- case SPU::LQAv4f32:
- case SPU::LQAv2f64:
- case SPU::LQAr128:
- case SPU::LQAr64:
- case SPU::LQAr32:
- case SPU::LQXv4i32:
- case SPU::LQXr128:
- case SPU::LQXr64:
- case SPU::LQXr32:
- case SPU::LQXr16:
- case SPU::STQDv16i8:
- case SPU::STQDv8i16:
- case SPU::STQDv4i32:
- case SPU::STQDv4f32:
- case SPU::STQDv2f64:
- case SPU::STQDr128:
- case SPU::STQDr64:
- case SPU::STQDr32:
- case SPU::STQDr16:
- case SPU::STQDr8:
- case SPU::STQAv16i8:
- case SPU::STQAv8i16:
- case SPU::STQAv4i32:
- case SPU::STQAv4f32:
- case SPU::STQAv2f64:
- case SPU::STQAr128:
- case SPU::STQAr64:
- case SPU::STQAr32:
- case SPU::STQAr16:
- case SPU::STQAr8:
- case SPU::STQXv16i8:
- case SPU::STQXv8i16:
- case SPU::STQXv4i32:
- case SPU::STQXv4f32:
- case SPU::STQXv2f64:
- case SPU::STQXr128:
- case SPU::STQXr64:
- case SPU::STQXr32:
- case SPU::STQXr16:
- case SPU::STQXr8:
- case SPU::RET:
- mustBeOdd = true;
- break;
- default:
- // Assume that this instruction can be on the even pipe
- break;
- }
-
- if (mustBeOdd && !EvenOdd)
- retval = Hazard;
-
- DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
- << retval << "\n");
- EvenOdd ^= 1;
- return retval;
-#else
- return NoHazard;
-#endif
-}
-
-void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
-{
-}
-
-void SPUHazardRecognizer::AdvanceCycle()
-{
- DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
-}
-
-void SPUHazardRecognizer::EmitNoop()
-{
- AdvanceCycle();
-}
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
deleted file mode 100644
index 30acaeaa36fb..000000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines hazard recognizers for scheduling on the Cell SPU
-// processor.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUHAZRECS_H
-#define SPUHAZRECS_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-
-namespace llvm {
-
-class TargetInstrInfo;
-
-/// SPUHazardRecognizer
-class SPUHazardRecognizer : public ScheduleHazardRecognizer
-{
-public:
- SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {}
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void EmitNoop();
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
deleted file mode 100644
index 5d5061054b08..000000000000
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ /dev/null
@@ -1,1192 +0,0 @@
-//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pattern matching instruction selector for the Cell SPU,
-// converting from a legalized dag to a SPU-target dag.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "SPUFrameLowering.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
- isI32IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
- bool
- isI32IntU10Immediate(ConstantSDNode *CN)
- {
- return isUInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
- bool
- isI16IntU10Immediate(ConstantSDNode *CN)
- {
- return isUInt<10>((short) CN->getZExtValue());
- }
-
- //! ConstantSDNode predicate for signed 16-bit values
- /*!
- \param CN The constant SelectionDAG node holding the value
- \param Imm The returned 16-bit value, if returning true
-
- This predicate tests the value in \a CN to see whether it can be
- represented as a 16-bit, sign-extended quantity. Returns true if
- this is the case.
- */
- bool
- isIntS16Immediate(ConstantSDNode *CN, short &Imm)
- {
- EVT vt = CN->getValueType(0);
- Imm = (short) CN->getZExtValue();
- if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
- return true;
- } else if (vt == MVT::i32) {
- int32_t i_val = (int32_t) CN->getZExtValue();
- return i_val == SignExtend32<16>(i_val);
- } else {
- int64_t i_val = (int64_t) CN->getZExtValue();
- return i_val == SignExtend64<16>(i_val);
- }
- }
-
- //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
- static bool
- isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
- {
- EVT vt = FPN->getValueType(0);
- if (vt == MVT::f32) {
- int val = FloatToBits(FPN->getValueAPF().convertToFloat());
- if (val == SignExtend32<16>(val)) {
- Imm = (short) val;
- return true;
- }
- }
-
- return false;
- }
-
- //! Generate the carry-generate shuffle mask.
- SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
- }
-
- //! Generate the borrow-generate shuffle mask
- SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
- }
-
- //===------------------------------------------------------------------===//
- /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
- /// instructions for SelectionDAG operations.
- ///
- class SPUDAGToDAGISel :
- public SelectionDAGISel
- {
- const SPUTargetMachine &TM;
- const SPUTargetLowering &SPUtli;
- unsigned GlobalBaseReg;
-
- public:
- explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm),
- SPUtli(*tm.getTargetLowering())
- { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- // Make sure we re-emit a set of the global base reg if necessary
- GlobalBaseReg = 0;
- SelectionDAGISel::runOnMachineFunction(MF);
- return true;
- }
-
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
- inline SDValue getI32Imm(uint32_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
- }
-
- /// getSmallIPtrImm - Return a target constant of pointer type.
- inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
-
- SDNode *emitBuildVector(SDNode *bvNode) {
- EVT vecVT = bvNode->getValueType(0);
- DebugLoc dl = bvNode->getDebugLoc();
-
- // Check to see if this vector can be represented as a CellSPU immediate
- // constant by invoking all of the instruction selection predicates:
- if (((vecVT == MVT::v8i16) &&
- (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
- ((vecVT == MVT::v4i32) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
- ((vecVT == MVT::v2i64) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
- HandleSDNode Dummy(SDValue(bvNode, 0));
- if (SDNode *N = Select(bvNode))
- return N;
- return Dummy.getValue().getNode();
- }
-
- // No, need to emit a constant pool spill:
- std::vector<Constant*> CV;
-
- for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
- ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
- CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
- }
-
- const Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue CGPoolOffset =
- SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-
- HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
- CurDAG->getEntryNode(), CGPoolOffset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment));
- CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- }
-
- /// Select - Convert the specified operand from a target-independent to a
- /// target-specific node if it hasn't already been changed.
- SDNode *Select(SDNode *N);
-
- //! Emit the instruction sequence for i64 shl
- SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
-
- //! Emit the instruction sequence for i64 srl
- SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
-
- //! Emit the instruction sequence for i64 sra
- SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
-
- //! Emit the necessary sequence for loading i64 constants:
- SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
-
- //! Alternate instruction emit sequence for loading i64 constants
- SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
-
- //! Returns true if the address N is an A-form (local store) address
- bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- //! D-form address predicate
- bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// Alternate D-form address using i7 offset predicate
- bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base);
-
- /// D-form address selection workhorse
- bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base, int minOffset, int maxOffset);
-
- //! Address predicate if N can be expressed as an indexed [r+r] operation.
- bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
- switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
- if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
- && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
- SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
- break;
- case 'o': // offsetable
- if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
- && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
- Op0 = Op;
- Op1 = getSmallIPtrImm(0);
- }
- break;
- case 'v': // not offsetable
-#if 1
- llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
-#else
- SelectAddrIdxOnly(Op, Op, Op0, Op1);
- break;
-#endif
- }
-
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
- return false;
- }
-
- virtual const char *getPassName() const {
- return "Cell SPU DAG->DAG Pattern Instruction Selection";
- }
-
- private:
- SDValue getRC( MVT );
-
- // Include the pieces autogenerated from the target description.
-#include "SPUGenDAGISel.inc"
- };
-}
-
-/*!
- \param Op The ISD instruction operand
- \param N The address to be tested
- \param Base The base address
- \param Index The base address index
- */
-bool
-SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- // These match the addr256k operand type:
- EVT OffsVT = MVT::i16;
- SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
- int64_t val;
-
- switch (N.getOpcode()) {
- case ISD::Constant:
- val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
- Base = CurDAG->getTargetConstant( val , MVT::i32);
- Index = Zero;
- return true;
- case ISD::ConstantPool:
- case ISD::GlobalAddress:
- report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
- /*NOTREACHED*/
-
- case ISD::TargetConstant:
- case ISD::TargetGlobalAddress:
- case ISD::TargetJumpTable:
- report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
- "not wrapped as A-form address.");
- /*NOTREACHED*/
-
- case SPUISD::AFormAddr:
- // Just load from memory if there's only a single use of the location,
- // otherwise, this will get handled below with D-form offset addresses
- if (N.hasOneUse()) {
- SDValue Op0 = N.getOperand(0);
- switch (Op0.getOpcode()) {
- case ISD::TargetConstantPool:
- case ISD::TargetJumpTable:
- Base = Op0;
- Index = Zero;
- return true;
-
- case ISD::TargetGlobalAddress: {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
- const GlobalValue *GV = GSDN->getGlobal();
- if (GV->getAlignment() == 16) {
- Base = Op0;
- Index = Zero;
- return true;
- }
- break;
- }
- }
- }
- break;
- }
- return false;
-}
-
-bool
-SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base) {
- const int minDForm2Offset = -(1 << 7);
- const int maxDForm2Offset = (1 << 7) - 1;
- return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
- maxDForm2Offset);
-}
-
-/*!
- \param Op The ISD instruction (ignored)
- \param N The address to be tested
- \param Base Base address register/pointer
- \param Index Base address index
-
- Examine the input address by a base register plus a signed 10-bit
- displacement, [r+I10] (D-form address).
-
- \return true if \a N is a D-form address with \a Base and \a Index set
- to non-empty SDValue instances.
-*/
-bool
-SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- return DFormAddressPredicate(Op, N, Base, Index,
- SPUFrameLowering::minFrameOffset(),
- SPUFrameLowering::maxFrameOffset());
-}
-
-bool
-SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index, int minOffset,
- int maxOffset) {
- unsigned Opc = N.getOpcode();
- EVT PtrTy = SPUtli.getPointerTy();
-
- if (Opc == ISD::FrameIndex) {
- // Stack frame index must be less than 512 (divided by 16):
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
- << FI << "\n");
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (Opc == ISD::ADD) {
- // Generated by getelementptr
- const SDValue Op0 = N.getOperand(0);
- const SDValue Op1 = N.getOperand(1);
-
- if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
- || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = N;
- return true;
- } else if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- int32_t offset = int32_t(CN->getSExtValue());
-
- if (Op0.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
- << " frame index = " << FI << "\n");
-
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (offset > minOffset && offset < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = Op0;
- return true;
- }
- } else if (Op0.getOpcode() == ISD::Constant
- || Op0.getOpcode() == ISD::TargetConstant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
- int32_t offset = int32_t(CN->getSExtValue());
-
- if (Op1.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
- << " frame index = " << FI << "\n");
-
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (offset > minOffset && offset < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = Op1;
- return true;
- }
- }
- } else if (Opc == SPUISD::IndirectAddr) {
- // Indirect with constant offset -> D-Form address
- const SDValue Op0 = N.getOperand(0);
- const SDValue Op1 = N.getOperand(1);
-
- if (Op0.getOpcode() == SPUISD::Hi
- && Op1.getOpcode() == SPUISD::Lo) {
- // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = N;
- return true;
- } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
- int32_t offset = 0;
- SDValue idxOp;
-
- if (isa<ConstantSDNode>(Op1)) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- offset = int32_t(CN->getSExtValue());
- idxOp = Op0;
- } else if (isa<ConstantSDNode>(Op0)) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
- offset = int32_t(CN->getSExtValue());
- idxOp = Op1;
- }
-
- if (offset >= minOffset && offset <= maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = idxOp;
- return true;
- }
- }
- } else if (Opc == SPUISD::AFormAddr) {
- Base = CurDAG->getTargetConstant(0, N.getValueType());
- Index = N;
- return true;
- } else if (Opc == SPUISD::LDRESULT) {
- Base = CurDAG->getTargetConstant(0, N.getValueType());
- Index = N;
- return true;
- } else if (Opc == ISD::Register
- ||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF
- ||Opc == ISD::Constant) {
- unsigned OpOpc = Op->getOpcode();
-
- if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
- // Direct load/store without getelementptr
- SDValue Offs;
-
- Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
-
- if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
- if (Offs.getOpcode() == ISD::UNDEF)
- Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
-
- Base = Offs;
- Index = N;
- return true;
- }
- } else {
- /* If otherwise unadorned, default to D-form address with 0 offset: */
- if (Opc == ISD::CopyFromReg) {
- Index = N.getOperand(1);
- } else {
- Index = N;
- }
-
- Base = CurDAG->getTargetConstant(0, Index.getValueType());
- return true;
- }
- }
-
- return false;
-}
-
-/*!
- \param Op The ISD instruction operand
- \param N The address operand
- \param Base The base pointer operand
- \param Index The offset/index operand
-
- If the address \a N can be expressed as an A-form or D-form address, returns
- false. Otherwise, creates two operands, Base and Index that will become the
- (r)(r) X-form address.
-*/
-bool
-SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- if (!SelectAFormAddr(Op, N, Base, Index)
- && !SelectDFormAddr(Op, N, Base, Index)) {
- // If the address is neither A-form or D-form, punt and use an X-form
- // address:
- Base = N.getOperand(1);
- Index = N.getOperand(0);
- return true;
- }
-
- return false;
-}
-
-/*!
- Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
- to be used as the last parameter of a
-CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
- \param VT the value type for which we want a register class
-*/
-SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
- switch( VT.SimpleTy ) {
- case MVT::i8:
- return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
- case MVT::i16:
- return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
- case MVT::i32:
- return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
- case MVT::f32:
- return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
- case MVT::i64:
- return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
- case MVT::i128:
- return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v2f64:
- return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
- default:
- assert( false && "add a new case here" );
- return SDValue();
- }
-}
-
-//! Convert the operand from a target-independent to a target-specific node
-/*!
- */
-SDNode *
-SPUDAGToDAGISel::Select(SDNode *N) {
- unsigned Opc = N->getOpcode();
- int n_ops = -1;
- unsigned NewOpc = 0;
- EVT OpVT = N->getValueType(0);
- SDValue Ops[8];
- DebugLoc dl = N->getDebugLoc();
-
- if (N->isMachineOpcode())
- return NULL; // Already selected.
-
- if (Opc == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
- SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
-
- if (FI < 128) {
- NewOpc = SPU::AIr32;
- Ops[0] = TFI;
- Ops[1] = Imm0;
- n_ops = 2;
- } else {
- NewOpc = SPU::Ar32;
- Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
- N->getValueType(0), TFI),
- 0);
- n_ops = 2;
- }
- } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
- // Catch the i64 constants that end up here. Note: The backend doesn't
- // attempt to legalize the constant (it's useless because DAGCombiner
- // will insert 64-bit constants and we can't stop it).
- return SelectI64Constant(N, OpVT, N->getDebugLoc());
- } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
- && OpVT == MVT::i64) {
- SDValue Op0 = N->getOperand(0);
- EVT Op0VT = Op0.getValueType();
- EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- Op0VT, (128 / Op0VT.getSizeInBits()));
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue shufMask;
-
- switch (Op0VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
- /*NOTREACHED*/
- case MVT::i32:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x00010203, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x08090a0b, MVT::i32));
- break;
-
- case MVT::i16:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80800203, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80800a0b, MVT::i32));
- break;
-
- case MVT::i8:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80808003, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x8080800b, MVT::i32));
- break;
- }
-
- SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-
- HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
- Op0VecVT, Op0));
-
- SDValue PromScalar;
- if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
- PromScalar = SDValue(N, 0);
- else
- PromScalar = PromoteScalar.getValue();
-
- SDValue zextShuffle =
- CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- PromScalar, PromScalar,
- SDValue(shufMaskLoad, 0));
-
- HandleSDNode Dummy2(zextShuffle);
- if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
- zextShuffle = SDValue(N, 0);
- else
- zextShuffle = Dummy2.getValue();
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
- zextShuffle));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- SelectCode(Dummy.getValue().getNode());
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::TRUNCATE) {
- SDValue Op0 = N->getOperand(0);
- if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
- && OpVT == MVT::i32
- && Op0.getValueType() == MVT::i64) {
- // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
- //
- // Take advantage of the fact that the upper 32 bits are in the
- // i32 preferred slot and avoid shuffle gymnastics:
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
- if (CN != 0) {
- unsigned shift_amt = unsigned(CN->getZExtValue());
-
- if (shift_amt >= 32) {
- SDNode *hi32 =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- Op0.getOperand(0), getRC(MVT::i32));
-
- shift_amt -= 32;
- if (shift_amt > 0) {
- // Take care of the additional shift, if present:
- SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
- unsigned Opc = SPU::ROTMAIr32_i32;
-
- if (Op0.getOpcode() == ISD::SRL)
- Opc = SPU::ROTMr32;
-
- hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
- shift);
- }
-
- return hi32;
- }
- }
- }
- } else if (Opc == ISD::SHL) {
- if (OpVT == MVT::i64)
- return SelectSHLi64(N, OpVT);
- } else if (Opc == ISD::SRL) {
- if (OpVT == MVT::i64)
- return SelectSRLi64(N, OpVT);
- } else if (Opc == ISD::SRA) {
- if (OpVT == MVT::i64)
- return SelectSRAi64(N, OpVT);
- } else if (Opc == ISD::FNEG
- && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
- DebugLoc dl = N->getDebugLoc();
- // Check if the pattern is a special form of DFNMS:
- // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
- SDValue Op0 = N->getOperand(0);
- if (Op0.getOpcode() == ISD::FSUB) {
- SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == ISD::FMUL) {
- unsigned Opc = SPU::DFNMSf64;
- if (OpVT == MVT::v2f64)
- Opc = SPU::DFNMSv2f64;
-
- return CurDAG->getMachineNode(Opc, dl, OpVT,
- Op00.getOperand(0),
- Op00.getOperand(1),
- Op0.getOperand(1));
- }
- }
-
- SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
- SDNode *signMask = 0;
- unsigned Opc = SPU::XORfneg64;
-
- if (OpVT == MVT::f64) {
- signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
- } else if (OpVT == MVT::v2f64) {
- Opc = SPU::XORfnegvec;
- signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
- MVT::v2i64,
- negConst, negConst).getNode());
- }
-
- return CurDAG->getMachineNode(Opc, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- } else if (Opc == ISD::FABS) {
- if (OpVT == MVT::f64) {
- SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
- return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- } else if (OpVT == MVT::v2f64) {
- SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
- SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
- absConst, absConst);
- SDNode *signMask = emitBuildVector(absVec.getNode());
- return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- }
- } else if (Opc == SPUISD::LDRESULT) {
- // Custom select instructions for LDRESULT
- EVT VT = N->getValueType(0);
- SDValue Arg = N->getOperand(0);
- SDValue Chain = N->getOperand(1);
- SDNode *Result;
-
- Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
- MVT::Other, Arg,
- getRC( VT.getSimpleVT()), Chain);
- return Result;
-
- } else if (Opc == SPUISD::IndirectAddr) {
- // Look at the operands: SelectCode() will catch the cases that aren't
- // specifically handled here.
- //
- // SPUInstrInfo catches the following patterns:
- // (SPUindirect (SPUhi ...), (SPUlo ...))
- // (SPUindirect $sp, imm)
- EVT VT = N->getValueType(0);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- RegisterSDNode *RN;
-
- if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
- || (Op0.getOpcode() == ISD::Register
- && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
- && RN->getReg() != SPU::R1))) {
- NewOpc = SPU::Ar32;
- Ops[1] = Op1;
- if (Op1.getOpcode() == ISD::Constant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
- if (isInt<10>(CN->getSExtValue())) {
- NewOpc = SPU::AIr32;
- Ops[1] = Op1;
- } else {
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
- N->getValueType(0),
- Op1),
- 0);
- }
- }
- Ops[0] = Op0;
- n_ops = 2;
- }
- }
-
- if (n_ops > 0) {
- if (N->hasOneUse())
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
- else
- return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
- } else
- return SelectCode(N);
-}
-
-/*!
- * Emit the instruction sequence for i64 left shifts. The basic algorithm
- * is to fill the bottom two word slots with zeros so that zeros are shifted
- * in as the entire quadword is shifted left.
- *
- * \note This code could also be used to implement v2i64 shl.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
- SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
- SDValue SelMaskVal;
- DebugLoc dl = N->getDebugLoc();
-
- VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
- Op0, getRC(MVT::v2i64) );
- SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
- SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
- ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
- CurDAG->getTargetConstant(0, OpVT));
- VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
- SDValue(ZeroFill, 0),
- SDValue(VecOp0, 0),
- SDValue(SelMask, 0));
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
- SDValue(VecOp0, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : VecOp0), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *Bytes =
- CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(3, ShiftAmtVT));
- SDNode *Bits =
- CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(7, ShiftAmtVT));
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
- SDValue(VecOp0, 0), SDValue(Bytes, 0));
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(Bits, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 logical right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
- SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- SDNode *VecOp0, *Shift = 0;
- DebugLoc dl = N->getDebugLoc();
-
- VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
- Op0, getRC(MVT::v2i64) );
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
- SDValue(VecOp0, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : VecOp0), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *Bytes =
- CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(3, ShiftAmtVT));
- SDNode *Bits =
- CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(7, ShiftAmtVT));
-
- // Ensure that the shift amounts are negated!
- Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- SDValue(Bytes, 0),
- CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- SDValue(Bits, 0),
- CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
- SDValue(VecOp0, 0), SDValue(Bytes, 0));
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(Bits, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 arithmetic right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
- // Promote Op0 to vector
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- DebugLoc dl = N->getDebugLoc();
-
- SDNode *VecOp0 =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- VecVT, N->getOperand(0), getRC(MVT::v2i64));
-
- SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
- SDNode *SignRot =
- CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
- SDValue(VecOp0, 0), SignRotAmt);
- SDNode *UpperHalfSign =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
-
- SDNode *UpperHalfSignMask =
- CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
- SDNode *UpperLowerMask =
- CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
- CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
- SDNode *UpperLowerSelect =
- CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
- SDValue(UpperHalfSignMask, 0),
- SDValue(VecOp0, 0),
- SDValue(UpperLowerMask, 0));
-
- SDNode *Shift = 0;
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- bytes = 31 - bytes;
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
- SDValue(UpperLowerSelect, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- bits = 8 - bits;
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *NegShift =
- CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
- SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(NegShift, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- Do the necessary magic necessary to load a i64 constant
- */
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
- DebugLoc dl) {
- ConstantSDNode *CN = cast<ConstantSDNode>(N);
- return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
-}
-
-SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
- DebugLoc dl) {
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
- SDValue i64vec =
- SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
-
- // Here's where it gets interesting, because we have to parse out the
- // subtree handed back in i64vec:
-
- if (i64vec.getOpcode() == ISD::BITCAST) {
- // The degenerate case where the upper and lower bits in the splat are
- // identical:
- SDValue Op0 = i64vec.getOperand(0);
-
- ReplaceUses(i64vec, Op0);
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- SDValue(emitBuildVector(Op0.getNode()), 0),
- getRC(MVT::i64));
- } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
- SDValue lhs = i64vec.getOperand(0);
- SDValue rhs = i64vec.getOperand(1);
- SDValue shufmask = i64vec.getOperand(2);
-
- if (lhs.getOpcode() == ISD::BITCAST) {
- ReplaceUses(lhs, lhs.getOperand(0));
- lhs = lhs.getOperand(0);
- }
-
- SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
- ? lhs.getNode()
- : emitBuildVector(lhs.getNode()));
-
- if (rhs.getOpcode() == ISD::BITCAST) {
- ReplaceUses(rhs, rhs.getOperand(0));
- rhs = rhs.getOperand(0);
- }
-
- SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
- ? rhs.getNode()
- : emitBuildVector(rhs.getNode()));
-
- if (shufmask.getOpcode() == ISD::BITCAST) {
- ReplaceUses(shufmask, shufmask.getOperand(0));
- shufmask = shufmask.getOperand(0);
- }
-
- SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
- ? shufmask.getNode()
- : emitBuildVector(shufmask.getNode()));
-
- SDValue shufNode =
- CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- SDValue(lhsNode, 0), SDValue(rhsNode, 0),
- SDValue(shufMaskNode, 0));
- HandleSDNode Dummy(shufNode);
- SDNode *SN = SelectCode(Dummy.getValue().getNode());
- if (SN == 0) SN = Dummy.getValue().getNode();
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(SN, 0), getRC(MVT::i64));
- } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- SDValue(emitBuildVector(i64vec.getNode()), 0),
- getRC(MVT::i64));
- } else {
- report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
- "condition");
- }
-}
-
-/// createSPUISelDag - This pass converts a legalized DAG into a
-/// SPU-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
- return new SPUDAGToDAGISel(TM);
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
deleted file mode 100644
index 4e9fcd1bc765..000000000000
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ /dev/null
@@ -1,3266 +0,0 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUISelLowering.h"
-#include "SPUTargetMachine.h"
-#include "SPUFrameLowering.h"
-#include "SPUMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- // Byte offset of the preferred slot (counted from the MSB)
- int prefslotOffset(EVT VT) {
- int retval=0;
- if (VT==MVT::i1) retval=3;
- if (VT==MVT::i8) retval=3;
- if (VT==MVT::i16) retval=2;
-
- return retval;
- }
-
- //! Expand a library call into an actual call DAG node
- /*!
- \note
- This code is taken from SelectionDAGLegalize, since it is not exposed as
- part of the LLVM SelectionDAG API.
- */
-
- SDValue
- ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
- bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op.getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op.getOperand(i);
- Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
-
- // Splice the libcall in wherever FindInputOutputChains tells us to.
- Type *RetTy =
- Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
- TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
- false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- Callee, Args, DAG, Op.getDebugLoc());
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
- }
-}
-
-SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()),
- SPUTM(TM) {
-
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
- // Set RTLIB libcall names as used by SPU:
- setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
-
- // Set up the SPU's register classes:
- addRegisterClass(MVT::i8, &SPU::R8CRegClass);
- addRegisterClass(MVT::i16, &SPU::R16CRegClass);
- addRegisterClass(MVT::i32, &SPU::R32CRegClass);
- addRegisterClass(MVT::i64, &SPU::R64CRegClass);
- addRegisterClass(MVT::f32, &SPU::R32FPRegClass);
- addRegisterClass(MVT::f64, &SPU::R64FPRegClass);
- addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
-
- // SPU has no sign or zero extended loads for i1, i8, i16:
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
- setTruncStoreAction(MVT::i128, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i16, Expand);
- setTruncStoreAction(MVT::i128, MVT::i8, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // SPU constant load actions are custom lowered:
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
- // SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- // Expand the jumptable branches
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
-
- // Custom lower SELECT_CC for most cases, but expand by default
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-
- // SPU has no intrinsics for these particular operations:
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-
- // SPU has no division/remainder instructions
- setOperationAction(ISD::SREM, MVT::i8, Expand);
- setOperationAction(ISD::UREM, MVT::i8, Expand);
- setOperationAction(ISD::SDIV, MVT::i8, Expand);
- setOperationAction(ISD::UDIV, MVT::i8, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::SREM, MVT::i16, Expand);
- setOperationAction(ISD::UREM, MVT::i16, Expand);
- setOperationAction(ISD::SDIV, MVT::i16, Expand);
- setOperationAction(ISD::UDIV, MVT::i16, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i128, Expand);
- setOperationAction(ISD::UREM, MVT::i128, Expand);
- setOperationAction(ISD::SDIV, MVT::i128, Expand);
- setOperationAction(ISD::UDIV, MVT::i128, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
-
- // We don't support sin/cos/sqrt/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
-
- // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
- // for f32!)
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
- // SPU can do rotate right and left, so legalize it... but customize for i8
- // because instructions don't exist.
-
- // FIXME: Change from "expand" to appropriate type once ROTR is supported in
- // .td files.
- setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
-
- setOperationAction(ISD::ROTL, MVT::i32, Legal);
- setOperationAction(ISD::ROTL, MVT::i16, Legal);
- setOperationAction(ISD::ROTL, MVT::i8, Custom);
-
- // SPU has no native version of shift left/right for i8
- setOperationAction(ISD::SHL, MVT::i8, Custom);
- setOperationAction(ISD::SRL, MVT::i8, Custom);
- setOperationAction(ISD::SRA, MVT::i8, Custom);
-
- // Make these operations legal and handle them during instruction selection:
- setOperationAction(ISD::SHL, MVT::i64, Legal);
- setOperationAction(ISD::SRL, MVT::i64, Legal);
- setOperationAction(ISD::SRA, MVT::i64, Legal);
-
- // Custom lower i8, i32 and i64 multiplications
- setOperationAction(ISD::MUL, MVT::i8, Custom);
- setOperationAction(ISD::MUL, MVT::i32, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Legal);
-
- // Expand double-width multiplication
- // FIXME: It would probably be reasonable to support some of these operations
- setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::MULHU, MVT::i8, Expand);
- setOperationAction(ISD::MULHS, MVT::i8, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::MULHU, MVT::i16, Expand);
- setOperationAction(ISD::MULHS, MVT::i16, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
-
- // Need to custom handle (some) common i8, i64 math ops
- setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Legal);
- setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Legal);
-
- // SPU does not have BSWAP. It does have i32 support CTLZ.
- // CTPOP has to be custom lowered.
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i8, Custom);
- setOperationAction(ISD::CTPOP, MVT::i16, Custom);
- setOperationAction(ISD::CTPOP, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
- setOperationAction(ISD::CTPOP, MVT::i128, Expand);
-
- setOperationAction(ISD::CTTZ , MVT::i8, Expand);
- setOperationAction(ISD::CTTZ , MVT::i16, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTTZ , MVT::i128, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
-
- setOperationAction(ISD::CTLZ , MVT::i8, Promote);
- setOperationAction(ISD::CTLZ , MVT::i16, Promote);
- setOperationAction(ISD::CTLZ , MVT::i32, Legal);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i128, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
-
- // SPU has a version of select that implements (a&~c)|(b&c), just like
- // select ought to work:
- setOperationAction(ISD::SELECT, MVT::i8, Legal);
- setOperationAction(ISD::SELECT, MVT::i16, Legal);
- setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Legal);
-
- setOperationAction(ISD::SETCC, MVT::i8, Legal);
- setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Legal);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
-
- // Custom lower i128 -> i64 truncates
- setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
-
- // Custom lower i32/i64 -> i128 sign extend
- setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
- // to expand to a libcall, hence the custom lowering:
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
-
- // FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
-
- // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
- setOperationAction(ISD::BITCAST, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::f32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i64, Legal);
- setOperationAction(ISD::BITCAST, MVT::f64, Legal);
-
- // We cannot sextinreg(i1). Expand to shifts.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // We want to legalize GlobalAddress and ConstantPool nodes into the
- // appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
- }
-
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
-
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
-
- // Cell SPU has instructions for converting between i64 and fp.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-
- // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
-
- // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
- // First set operation action for all vector types to expand. Then we
- // will selectively turn on ones that can be effectively codegen'd.
- addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
-
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
- // Set operation actions to legal types only.
- if (!isTypeLegal(VT)) continue;
-
- // add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD, VT, Legal);
- setOperationAction(ISD::SUB, VT, Legal);
- // mul has to be custom lowered.
- setOperationAction(ISD::MUL, VT, Legal);
-
- setOperationAction(ISD::AND, VT, Legal);
- setOperationAction(ISD::OR, VT, Legal);
- setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Custom);
-
- // These operations need to be expanded:
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
-
- // Expand all trunc stores
- for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
- MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
- setTruncStoreAction(VT, TargetVT, Expand);
- }
-
- // Custom lower build_vector, constant pool spills, insert and
- // extract vector elements:
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- }
-
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-
- setOperationAction(ISD::AND, MVT::v16i8, Custom);
- setOperationAction(ISD::OR, MVT::v16i8, Custom);
- setOperationAction(ISD::XOR, MVT::v16i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
-
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
-
- setStackPointerRegisterToSaveRestore(SPU::R1);
-
- // We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
-
- setMinFunctionAlignment(3);
-
- computeRegisterProperties();
-
- // Set pre-RA register scheduler default to BURR, which produces slightly
- // better code than the default (could also be TDRR, but TargetLowering.h
- // needs a mod to support that model):
- setSchedulingPreference(Sched::RegPressure);
-}
-
-const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
- case SPUISD::Hi: return "SPUISD::Hi";
- case SPUISD::Lo: return "SPUISD::Lo";
- case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
- case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
- case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
- case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
- case SPUISD::CALL: return "SPUISD::CALL";
- case SPUISD::SHUFB: return "SPUISD::SHUFB";
- case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
- case SPUISD::CNTB: return "SPUISD::CNTB";
- case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
- case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
- case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
- case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
- case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
- case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
- case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
- case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
- case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
- case SPUISD::SELB: return "SPUISD::SELB";
- case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
- case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
- case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Return the Cell SPU's SETCC result type
-//===----------------------------------------------------------------------===//
-
-EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i8, i16 and i32 are valid SETCC result types
- MVT::SimpleValueType retval;
-
- switch(VT.getSimpleVT().SimpleTy){
- case MVT::i1:
- case MVT::i8:
- retval = MVT::i8; break;
- case MVT::i16:
- retval = MVT::i16; break;
- case MVT::i32:
- default:
- retval = MVT::i32;
- }
- return retval;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling convention code:
-//===----------------------------------------------------------------------===//
-
-#include "SPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// LowerOperation implementation
-//===----------------------------------------------------------------------===//
-
-/// Custom lower loads for CellSPU
-/*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
-
- For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for MVT::f32 extending load to MVT::f64:
-
-\verbatim
-%1 v16i8,ch = load
-%2 v16i8,ch = rotate %1
-%3 v4f8, ch = bitconvert %2
-%4 f32 = vec2perfslot %3
-%5 f64 = fp_extend %4
-\endverbatim
-*/
-static SDValue
-LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- LoadSDNode *LN = cast<LoadSDNode>(Op);
- SDValue the_chain = LN->getChain();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- EVT InVT = LN->getMemoryVT();
- EVT OutVT = Op.getValueType();
- ISD::LoadExtType ExtType = LN->getExtensionType();
- unsigned alignment = LN->getAlignment();
- int pso = prefslotOffset(InVT);
- DebugLoc dl = Op.getDebugLoc();
- EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
- (128 / InVT.getSizeInBits()));
-
- // two sanity checks
- assert( LN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = LN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
-
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - pso);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, MVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, MVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(-pso, PtrVT));
- }
-
- // Do the load as a i128 to allow possible shifting
- SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false, 16);
-
- // When the size is not greater than alignment we get all data with just
- // one load
- if (alignment >= InVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- // Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
- low.getValue(0), rotate);
-
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- InVT, (128 / InVT.getSizeInBits()));
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BITCAST, dl, vecVT, result));
- }
- // When alignment is less than the size, we might need (known only at
- // run-time) two loads
- // TODO: if the memory address is composed only from constants, we have
- // extra kowledge, and might avoid the second load
- else {
- // storage position offset from lower 16 byte aligned memory chunk
- SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
- basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
- // get a registerfull of ones. (this implementation is a workaround: LLVM
- // cannot handle 128 bit signed int constants)
- SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
- ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
- SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
- DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(16, PtrVT)),
- highMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false,
- 16);
-
- the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
- high.getValue(1));
-
- // Shift the (possible) high part right to compensate the misalignemnt.
- // if there is no highpart (i.e. value is i64 and offset is 4), this
- // will zero out the high value.
- high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
- DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- offset
- ));
-
- // Shift the low similarly
- // TODO: add SPUISD::SHL_BYTES
- low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
-
- // Merge the two parts
- result = DAG.getNode(ISD::BITCAST, dl, vecVT,
- DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
-
- if (!InVT.isVector()) {
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
- }
-
- }
- // Handle extending loads by extending the scalar result:
- if (ExtType == ISD::SEXTLOAD) {
- result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::ZEXTLOAD) {
- result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::EXTLOAD) {
- unsigned NewOpc = ISD::ANY_EXTEND;
-
- if (OutVT.isFloatingPoint())
- NewOpc = ISD::FP_EXTEND;
-
- result = DAG.getNode(NewOpc, dl, OutVT, result);
- }
-
- SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
- SDValue retops[2] = {
- result,
- the_chain
- };
-
- result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
- retops, sizeof(retops) / sizeof(retops[0]));
- return result;
-}
-
-/// Custom lower stores for CellSPU
-/*!
- All CellSPU stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to generate a shuffle to insert the
- requested element into its place, then store the resulting block.
- */
-static SDValue
-LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- StoreSDNode *SN = cast<StoreSDNode>(Op);
- SDValue Value = SN->getValue();
- EVT VT = Value.getValueType();
- EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
- unsigned alignment = SN->getAlignment();
- SDValue result;
- EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
- (128 / StVT.getSizeInBits()));
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = SN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
-
-
- // two sanity checks
- assert( SN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Load the lower part of the memory to which to store.
- SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
- false, 16);
-
- // if we don't need to store over the 16 byte boundary, one store suffices
- if (alignment >= StVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- LoadSDNode *LN = cast<LoadSDNode>(low);
- SDValue theValue = SN->getValue();
-
- if (StVT != VT
- && (theValue.getOpcode() == ISD::AssertZext
- || theValue.getOpcode() == ISD::AssertSext)) {
- // Drill down and get the value for zero- and sign-extended
- // quantities
- theValue = theValue.getOperand(0);
- }
-
- // If the base pointer is already a D-form address, then just create
- // a new D-form address with a slot offset and the orignal base pointer.
- // Otherwise generate a D-form address with the slot offset relative
- // to the stack pointer, which is always aligned.
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "CellSPU LowerSTORE: basePtr = ";
- basePtr.getNode()->dump(&DAG);
- errs() << "\n";
- }
-#endif
-
- SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
- insertEltOffs);
- SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
- theValue);
-
- result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, low,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32, insertEltOp));
-
- result = DAG.getStore(the_chain, dl, result, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(),
- 16);
-
- }
- // do the store when it might cross the 16 byte memory access boundary.
- else {
- // TODO issue a warning if SN->isVolatile()== true? This is likely not
- // what the user wanted.
-
- // address offset from nearest lower 16byte alinged address
- SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
- SN->getBasePtr(),
- DAG.getConstant(0xf, MVT::i32));
- // 16 - offset
- SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- offset);
- // 16 - sizeof(Value)
- SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- DAG.getConstant( VT.getSizeInBits()/8,
- MVT::i32));
- // get a registerfull of ones
- SDValue ones = DAG.getConstant(-1, MVT::v4i32);
- ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
- // Create the 128 bit masks that have ones where the data to store is
- // located.
- SDValue lowmask, himask;
- // if the value to store don't fill up the an entire 128 bits, zero
- // out the last bits of the mask so that only the value we want to store
- // is masked.
- // this is e.g. in the case of store i32, align 2
- if (!VT.isVector()){
- Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
- lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
- lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
- surplus);
- Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
- Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
-
- }
- else {
- lowmask = ones;
- Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
- }
- // this will zero, if there are no data that goes to the high quad
- himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
- offset_compl);
- lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
- offset);
-
- // Load in the old data and zero out the parts that will be overwritten with
- // the new data to store.
- SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
- DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
- DAG.getConstant( 16, PtrVT)),
- highMemPtr,
- SN->isVolatile(), SN->isNonTemporal(),
- false, 16);
- the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
- hi.getValue(1));
-
- low = DAG.getNode(ISD::AND, dl, MVT::i128,
- DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
- DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
- hi = DAG.getNode(ISD::AND, dl, MVT::i128,
- DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
- DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
-
- // Shift the Value to store into place. rlow contains the parts that go to
- // the lower memory chunk, rhi has the parts that go to the upper one.
- SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
- rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
- SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
- offset_compl);
-
- // Merge the old data and the new data and store the results
- // Need to convert vectors here to integer as 'OR'ing floats assert
- rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
- rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
-
- low = DAG.getStore(the_chain, dl, rlow, basePtr,
- lowMemPtr,
- SN->isVolatile(), SN->isNonTemporal(), 16);
- hi = DAG.getStore(the_chain, dl, rhi,
- DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
- DAG.getConstant( 16, PtrVT)),
- highMemPtr,
- SN->isVolatile(), SN->isNonTemporal(), 16);
- result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
- hi.getValue(0));
- }
-
- return result;
-}
-
-//! Generate the address of a constant pool entry.
-static SDValue
-LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- const Constant *C = CP->getConstVal();
- SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
- SDValue Zero = DAG.getConstant(0, PtrVT);
- const TargetMachine &TM = DAG.getTarget();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- // Just return the SDValue with the constant pool address in it.
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- }
-
- llvm_unreachable("LowerConstantPool: Relocation model other than static"
- " not supported.");
-}
-
-//! Alternate entry point for generating the address of a constant pool entry
-SDValue
-SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
- return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
-}
-
-static SDValue
-LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- SDValue Zero = DAG.getConstant(0, PtrVT);
- const TargetMachine &TM = DAG.getTarget();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- }
-
- llvm_unreachable("LowerJumpTable: Relocation model other than static"
- " not supported.");
-}
-
-static SDValue
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
- PtrVT, GSDN->getOffset());
- const TargetMachine &TM = DAG.getTarget();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- } else {
- report_fatal_error("LowerGlobalAddress: Relocation model other than static"
- "not supported.");
- /*NOTREACHED*/
- }
-}
-
-//! Custom lower double precision floating point constants
-static SDValue
-LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (VT == MVT::f64) {
- ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
- assert((FP != 0) &&
- "LowerConstantFP: Node is not ConstantFPSDNode");
-
- uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
- SDValue T = DAG.getConstant(dbits, MVT::i64);
- SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
- }
-
- return SDValue();
-}
-
-SDValue
-SPUTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
-
- unsigned ArgOffset = SPUFrameLowering::minStackSize();
- unsigned ArgRegIdx = 0;
- unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- // FIXME: allow for other calling conventions
- CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
-
- // Add DAG nodes to load the arguments or copy them out of registers.
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
- EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
- SDValue ArgVal;
- CCValAssign &VA = ArgLocs[ArgNo];
-
- if (VA.isRegLoc()) {
- const TargetRegisterClass *ArgRegClass;
-
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("LowerFormalArguments Unhandled argument type: " +
- Twine(ObjectVT.getEVTString()));
- case MVT::i8:
- ArgRegClass = &SPU::R8CRegClass;
- break;
- case MVT::i16:
- ArgRegClass = &SPU::R16CRegClass;
- break;
- case MVT::i32:
- ArgRegClass = &SPU::R32CRegClass;
- break;
- case MVT::i64:
- ArgRegClass = &SPU::R64CRegClass;
- break;
- case MVT::i128:
- ArgRegClass = &SPU::GPRCRegClass;
- break;
- case MVT::f32:
- ArgRegClass = &SPU::R32FPRegClass;
- break;
- case MVT::f64:
- ArgRegClass = &SPU::R64FPRegClass;
- break;
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- ArgRegClass = &SPU::VECREGRegClass;
- break;
- }
-
- unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++ArgRegIdx;
- } else {
- // We need to load the argument to a virtual register if we determined
- // above that we ran out of physical registers of the appropriate type
- // or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, false, 0);
- ArgOffset += StackSlotSize;
- }
-
- InVals.push_back(ArgVal);
- // Update the chain
- Chain = ArgVal.getOperand(0);
- }
-
- // vararg handling:
- if (isVarArg) {
- // FIXME: we should be able to query the argument registers from
- // tablegen generated code.
- static const uint16_t ArgRegs[] = {
- SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
- SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
- SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
- SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
- SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
- SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
- SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
- SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
- SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
- SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
- SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
- };
- // size of ArgRegs array
- const unsigned NumArgRegs = 77;
-
- // We will spill (79-3)+1 registers to the stack
- SmallVector<SDValue, 79-3+1> MemOps;
-
- // Create the frame slot
- for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
- SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
- SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
- false, false, 0);
- Chain = Store.getOperand(0);
- MemOps.push_back(Store);
-
- // Increment address by stack slot size for the next stored argument
- ArgOffset += StackSlotSize;
- }
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
- }
-
- return Chain;
-}
-
-/// isLSAAddress - Return the immediate to use if the specified
-/// value is representable as a LSA address.
-static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C) return 0;
-
- int Addr = C->getZExtValue();
- if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 14 >> 14) != Addr)
- return 0; // Top 14 bits have to be sext of immediate.
-
- return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
-}
-
-SDValue
-SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
- bool isVarArg = CLI.IsVarArg;
-
- // CellSPU target does not yet support tail call optimization.
- isTailCall = false;
-
- const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
- unsigned NumOps = Outs.size();
- unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- // FIXME: allow for other calling conventions
- CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
- const unsigned NumArgRegs = ArgLocs.size();
-
-
- // Handy pointer type
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- // Set up a copy of the stack pointer for use loading and storing any
- // arguments that may not fit in the registers available for argument
- // passing.
- SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
-
- // Figure out which arguments are going to go in registers, and which in
- // memory.
- unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
- unsigned ArgRegIdx = 0;
-
- // Keep track of registers passing arguments
- std::vector<std::pair<unsigned, SDValue> > RegsToPass;
- // And the arguments passed on the stack
- SmallVector<SDValue, 8> MemOpChains;
-
- for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
- SDValue Arg = OutVals[ArgRegIdx];
- CCValAssign &VA = ArgLocs[ArgRegIdx];
-
- // PtrOff will be used to store the current argument to the stack if a
- // register cannot be found for it.
- SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2i64:
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
- }
- }
-
- // Accumulate how many bytes are to be pushed on the stack, including the
- // linkage area, and parameter passing area. According to the SPU ABI,
- // we minimally need space for [LR] and [SP].
- unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
-
- // Insert a call sequence start
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
- true));
-
- if (!MemOpChains.empty()) {
- // Adjust the stack pointer for the stack arguments.
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = SPUISD::CALL;
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- EVT CalleeVT = Callee.getValueType();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
-
- if (!ST->usingLargeMem()) {
- // Turn calls to targets that are defined (i.e., have bodies) into BRSL
- // style calls, otherwise, external symbols are BRASL calls. This assumes
- // that declared/defined symbols are in the same compilation unit and can
- // be reached through PC-relative jumps.
- //
- // NOTE:
- // This may be an unsafe assumption for JIT and really large compilation
- // units.
- if (GV->isDeclaration()) {
- Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
- } else {
- Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
- }
- } else {
- // "Large memory" mode: Turn all calls into indirect calls with a X-form
- // address pairs:
- Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
- }
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- EVT CalleeVT = Callee.getValueType();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
- Callee.getValueType());
-
- if (!ST->usingLargeMem()) {
- Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
- } else {
- Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
- }
- } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
- // If this is an absolute destination address that appears to be a legal
- // local store address, use the munged value.
- Callee = SDValue(Dest, 0);
- }
-
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
- // Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
- &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
-
- // If the function returns void, just return the chain.
- if (Ins.empty())
- return Chain;
-
- // Now handle the return value(s)
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
-
-
- // If the call has results, copy the values out of the ret val registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign VA = RVLocs[i];
-
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
- InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
- InVals.push_back(Val);
- }
-
- return Chain;
-}
-
-SDValue
-SPUTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
-
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
- SDValue Flag;
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
- Flag = Chain.getValue(1);
- }
-
- if (Flag.getNode())
- return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Vector related lowering:
-//===----------------------------------------------------------------------===//
-
-static ConstantSDNode *
-getVecImm(SDNode *N) {
- SDValue OpVal(0, 0);
-
- // Check to see if this buildvec has a single non-undef value in its elements.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- else if (OpVal != N->getOperand(i))
- return 0;
- }
-
- if (OpVal.getNode() != 0) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
- return CN;
- }
- }
-
- return 0;
-}
-
-/// get_vec_i18imm - Test if this vector is a vector filled with the same value
-/// and the value fits into an unsigned 18-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- uint64_t Value = CN->getZExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (Value <= 0x3ffff)
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_vec_i16imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int64_t Value = CN->getSExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
- return DAG.getTargetConstant(Value, ValueType);
- }
- }
-
- return SDValue();
-}
-
-/// get_vec_i10imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 10-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int64_t Value = CN->getSExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (isInt<10>(Value))
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_vec_i8imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 8-bit constant, and if so, return the
-/// constant.
-///
-/// @note: The incoming vector is v16i8 because that's the only way we can load
-/// constant vectors. Thus, we test to see if the upper and lower bytes are the
-/// same value.
-SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int Value = (int) CN->getZExtValue();
- if (ValueType == MVT::i16
- && Value <= 0xffff /* truncated from uint64_t */
- && ((short) Value >> 8) == ((short) Value & 0xff))
- return DAG.getTargetConstant(Value & 0xff, ValueType);
- else if (ValueType == MVT::i8
- && (Value & 0xff) == Value)
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- uint64_t Value = CN->getZExtValue();
- if ((ValueType == MVT::i32
- && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
- || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
- return DAG.getTargetConstant(Value >> 16, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
-SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
- }
-
- return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
-SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
- }
-
- return SDValue();
-}
-
-//! Lower a BUILD_VECTOR instruction creatively:
-static SDValue
-LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
- DebugLoc dl = Op.getDebugLoc();
- BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
- unsigned minSplatBits = EltVT.getSizeInBits();
-
- if (minSplatBits < 16)
- minSplatBits = 16;
-
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
-
- if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, minSplatBits)
- || minSplatBits < SplatBitSize)
- return SDValue(); // Wasn't a constant vector or splat exceeded min
-
- uint64_t SplatBits = APSplatBits.getZExtValue();
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
- Twine(VT.getEVTString()));
- /*NOTREACHED*/
- case MVT::v4f32: {
- uint32_t Value32 = uint32_t(SplatBits);
- assert(SplatBitSize == 32
- && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
- // NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(Value32, MVT::i32);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
- }
- case MVT::v2f64: {
- uint64_t f64val = uint64_t(SplatBits);
- assert(SplatBitSize == 64
- && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
- // NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(f64val, MVT::i64);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
- }
- case MVT::v16i8: {
- // 8-bit constants have to be expanded to 16-bits
- unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
- SmallVector<SDValue, 8> Ops;
-
- Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
- }
- case MVT::v8i16: {
- unsigned short Value16 = SplatBits;
- SDValue T = DAG.getConstant(Value16, EltVT);
- SmallVector<SDValue, 8> Ops;
-
- Ops.assign(8, T);
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
- }
- case MVT::v4i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
- }
- case MVT::v2i64: {
- return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
- }
- }
-}
-
-/*!
- */
-SDValue
-SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
- DebugLoc dl) {
- uint32_t upper = uint32_t(SplatVal >> 32);
- uint32_t lower = uint32_t(SplatVal);
-
- if (upper == lower) {
- // Magic constant that can be matched by IL, ILA, et. al.
- SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
- return DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- Val, Val, Val, Val));
- } else {
- bool upper_special, lower_special;
-
- // NOTE: This code creates common-case shuffle masks that can be easily
- // detected as common expressions. It is not attempting to create highly
- // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
- // Detect if the upper or lower half is a special shuffle mask pattern:
- upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
- lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
-
- // Both upper and lower are special, lower to a constant pool load:
- if (lower_special && upper_special) {
- SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
- SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- UpperVal, LowerVal, UpperVal, LowerVal);
- return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
- }
-
- SDValue LO32;
- SDValue HI32;
- SmallVector<SDValue, 16> ShufBytes;
- SDValue Result;
-
- // Create lower vector if not a special pattern
- if (!lower_special) {
- SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- LO32C, LO32C, LO32C, LO32C));
- }
-
- // Create upper vector if not a special pattern
- if (!upper_special) {
- SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- HI32C, HI32C, HI32C, HI32C));
- }
-
- // If either upper or lower are special, then the two input operands are
- // the same (basically, one of them is a "don't care")
- if (lower_special)
- LO32 = HI32;
- if (upper_special)
- HI32 = LO32;
-
- for (int i = 0; i < 4; ++i) {
- uint64_t val = 0;
- for (int j = 0; j < 4; ++j) {
- SDValue V;
- bool process_upper, process_lower;
- val <<= 8;
- process_upper = (upper_special && (i & 1) == 0);
- process_lower = (lower_special && (i & 1) == 1);
-
- if (process_upper || process_lower) {
- if ((process_upper && upper == 0)
- || (process_lower && lower == 0))
- val |= 0x80;
- else if ((process_upper && upper == 0xffffffff)
- || (process_lower && lower == 0xffffffff))
- val |= 0xc0;
- else if ((process_upper && upper == 0x80000000)
- || (process_lower && lower == 0x80000000))
- val |= (j == 0 ? 0xe0 : 0x80);
- } else
- val |= i * 4 + j + ((i & 1) * 16);
- }
-
- ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
- }
-
- return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
- }
-}
-
-/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
-/// which the Cell can operate. The code inspects V3 to ascertain whether the
-/// permutation vector, V3, is monotonically increasing with one "exception"
-/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
-/// In either case, the net result is going to eventually invoke SHUFB to
-/// permute/shuffle the bytes from V1 and V2.
-/// \note
-/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
-/// control word for byte/halfword/word insertion. This takes care of a single
-/// element move from V2 into V1.
-/// \note
-/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-
- // If we have a single element being moved from V1 to V2, this can be handled
- // using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element, and the source
- // slot of the element to move must be the same as the destination.
- EVT VecVT = V1.getValueType();
- EVT EltVT = VecVT.getVectorElementType();
- unsigned EltsFromV2 = 0;
- unsigned V2EltOffset = 0;
- unsigned V2EltIdx0 = 0;
- unsigned CurrElt = 0;
- unsigned MaxElts = VecVT.getVectorNumElements();
- unsigned PrevElt = 0;
- bool monotonic = true;
- bool rotate = true;
- int rotamt=0;
- EVT maskVT; // which of the c?d instructions to use
-
- if (EltVT == MVT::i8) {
- V2EltIdx0 = 16;
- maskVT = MVT::v16i8;
- } else if (EltVT == MVT::i16) {
- V2EltIdx0 = 8;
- maskVT = MVT::v8i16;
- } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
- V2EltIdx0 = 4;
- maskVT = MVT::v4i32;
- } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
- V2EltIdx0 = 2;
- maskVT = MVT::v2i64;
- } else
- llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
-
- for (unsigned i = 0; i != MaxElts; ++i) {
- if (SVN->getMaskElt(i) < 0)
- continue;
-
- unsigned SrcElt = SVN->getMaskElt(i);
-
- if (monotonic) {
- if (SrcElt >= V2EltIdx0) {
- // TODO: optimize for the monotonic case when several consecutive
- // elements are taken form V2. Do we ever get such a case?
- if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
- V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
- else
- monotonic = false;
- ++EltsFromV2;
- } else if (CurrElt != SrcElt) {
- monotonic = false;
- }
-
- ++CurrElt;
- }
-
- if (rotate) {
- if (PrevElt > 0 && SrcElt < MaxElts) {
- if ((PrevElt == SrcElt - 1)
- || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
- PrevElt = SrcElt;
- } else {
- rotate = false;
- }
- } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
- // First time or after a "wrap around"
- rotamt = SrcElt-i;
- PrevElt = SrcElt;
- } else {
- // This isn't a rotation, takes elements from vector 2
- rotate = false;
- }
- }
- }
-
- if (EltsFromV2 == 1 && monotonic) {
- // Compute mask and shuffle
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
- // R1 ($sp) is used here only as it is guaranteed to have last bits zero
- SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2EltOffset, MVT::i32));
- SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
- maskVT, Pointer);
-
- // Use shuffle mask in SHUFB synthetic instruction:
- return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
- ShufMaskOp);
- } else if (rotate) {
- if (rotamt < 0)
- rotamt +=MaxElts;
- rotamt *= EltVT.getSizeInBits()/8;
- return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
- V1, DAG.getConstant(rotamt, MVT::i16));
- } else {
- // Convert the SHUFFLE_VECTOR mask's input element units to the
- // actual bytes.
- unsigned BytesPerElement = EltVT.getSizeInBits()/8;
-
- SmallVector<SDValue, 16> ResultMask;
- for (unsigned i = 0, e = MaxElts; i != e; ++i) {
- unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
-
- for (unsigned j = 0; j < BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
- }
- SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
- return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
- }
-}
-
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
- DebugLoc dl = Op.getDebugLoc();
-
- if (Op0.getNode()->getOpcode() == ISD::Constant) {
- // For a constant, build the appropriate constant vector, which will
- // eventually simplify to a vector register load.
-
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
- SmallVector<SDValue, 16> ConstVecValues;
- EVT VT;
- size_t n_copies;
-
- // Create a constant vector:
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected constant value type in "
- "LowerSCALAR_TO_VECTOR");
- case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
- case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
- case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
- case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
- case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
- case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
- }
-
- SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
- for (size_t j = 0; j < n_copies; ++j)
- ConstVecValues.push_back(CValue);
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
- &ConstVecValues[0], ConstVecValues.size());
- } else {
- // Otherwise, copy the value from one register to another:
- switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::f32:
- case MVT::f64:
- return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
- }
- }
-}
-
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- SDValue N = Op.getOperand(0);
- SDValue Elt = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
- SDValue retval;
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
- // Constant argument:
- int EltNo = (int) C->getZExtValue();
-
- // sanity checks:
- if (VT == MVT::i8 && EltNo >= 16)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
- else if (VT == MVT::i16 && EltNo >= 8)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
- else if (VT == MVT::i32 && EltNo >= 4)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
- else if (VT == MVT::i64 && EltNo >= 2)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
-
- if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
- // i32 and i64: Element 0 is the preferred slot
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
- }
-
- // Need to generate shuffle mask and extract:
- int prefslot_begin = -1, prefslot_end = -1;
- int elt_byte = EltNo * VT.getSizeInBits() / 8;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: {
- prefslot_begin = prefslot_end = 3;
- break;
- }
- case MVT::i16: {
- prefslot_begin = 2; prefslot_end = 3;
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- prefslot_begin = 0; prefslot_end = 3;
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- prefslot_begin = 0; prefslot_end = 7;
- break;
- }
- }
-
- assert(prefslot_begin != -1 && prefslot_end != -1 &&
- "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
-
- unsigned int ShufBytes[16] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- };
- for (int i = 0; i < 16; ++i) {
- // zero fill uppper part of preferred slot, don't care about the
- // other slots:
- unsigned int mask_val;
- if (i <= prefslot_end) {
- mask_val =
- ((i < prefslot_begin)
- ? 0x80
- : elt_byte + (i - prefslot_begin));
-
- ShufBytes[i] = mask_val;
- } else
- ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
- }
-
- SDValue ShufMask[4];
- for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
- unsigned bidx = i * 4;
- unsigned int bits = ((ShufBytes[bidx] << 24) |
- (ShufBytes[bidx+1] << 16) |
- (ShufBytes[bidx+2] << 8) |
- ShufBytes[bidx+3]);
- ShufMask[i] = DAG.getConstant(bits, MVT::i32);
- }
-
- SDValue ShufMaskVec =
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
-
- retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
- N, N, ShufMaskVec));
- } else {
- // Variable index: Rotate the requested element into slot 0, then replicate
- // slot 0 across the vector
- EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector()) {
- report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
- "vector type!");
- }
-
- // Make life easier by making sure the index is zero-extended to i32
- if (Elt.getValueType() != MVT::i32)
- Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
-
- // Scale the index to a bit/byte shift quantity
- APInt scaleFactor =
- APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
- unsigned scaleShift = scaleFactor.logBase2();
- SDValue vecShift;
-
- if (scaleShift > 0) {
- // Scale the shift factor:
- Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
- DAG.getConstant(scaleShift, MVT::i32));
- }
-
- vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
-
- // Replicate the bytes starting at byte 0 across the entire vector (for
- // consistency with the notion of a unified register set)
- SDValue replicate;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
- "type");
- /*NOTREACHED*/
- case MVT::i8: {
- SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i16: {
- SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
- SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- loFactor, hiFactor, loFactor, hiFactor);
- break;
- }
- }
-
- retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(SPUISD::SHUFB, dl, VecVT,
- vecShift, vecShift, replicate));
- }
-
- return retval;
-}
-
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- SDValue VecOp = Op.getOperand(0);
- SDValue ValOp = Op.getOperand(1);
- SDValue IdxOp = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT eltVT = ValOp.getValueType();
-
- // use 0 when the lane to insert to is 'undef'
- int64_t Offset=0;
- if (IdxOp.getOpcode() != ISD::UNDEF) {
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
- }
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Use $sp ($1) because it's always 16-byte aligned and it's available:
- SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Offset, PtrVT));
- // widen the mask when dealing with half vectors
- EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
- 128/ VT.getVectorElementType().getSizeInBits());
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
-
- SDValue result =
- DAG.getNode(SPUISD::SHUFB, dl, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
- VecOp,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
-
- return result;
-}
-
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
- const TargetLowering &TLI)
-{
- SDValue N0 = Op.getOperand(0); // Everything has at least one operand
- DebugLoc dl = Op.getDebugLoc();
- EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
-
- assert(Op.getValueType() == MVT::i8);
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled i8 math operator");
- case ISD::ADD: {
- // 8-bit addition: Promote the arguments up to 16-bits and truncate
- // the result:
- SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-
- }
-
- case ISD::SUB: {
- // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
- // the result:
- SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::ROTR:
- case ISD::ROTL: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- // Replicate lower 8-bits into upper 8:
- SDValue ExpandArg =
- DAG.getNode(ISD::OR, dl, MVT::i16, N0,
- DAG.getNode(ISD::SHL, dl, MVT::i16,
- N0, DAG.getConstant(8, MVT::i32)));
-
- // Truncate back down to i8
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
- }
- case ISD::SRL:
- case ISD::SHL: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = ISD::ZERO_EXTEND;
-
- if (N1.getValueType().bitsGT(ShiftVT))
- N1Opc = ISD::TRUNCATE;
-
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::SRA: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = ISD::SIGN_EXTEND;
-
- if (N1VT.bitsGT(ShiftVT))
- N1Opc = ISD::TRUNCATE;
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::MUL: {
- SDValue N1 = Op.getOperand(1);
-
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- }
-}
-
-//! Lower byte immediate operations for v16i8 vectors:
-static SDValue
-LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
- SDValue ConstVec;
- SDValue Arg;
- EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
-
- ConstVec = Op.getOperand(0);
- Arg = Op.getOperand(1);
- if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
- if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
- ConstVec = ConstVec.getOperand(0);
- } else {
- ConstVec = Op.getOperand(1);
- Arg = Op.getOperand(0);
- if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
- ConstVec = ConstVec.getOperand(0);
- }
- }
- }
-
- if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
- BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
- assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
-
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
-
- if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, minSplatBits)
- && minSplatBits <= SplatBitSize) {
- uint64_t SplatBits = APSplatBits.getZExtValue();
- SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
-
- SmallVector<SDValue, 16> tcVec;
- tcVec.assign(16, tc);
- return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
- }
- }
-
- // These operations (AND, OR, XOR) are legal, they just couldn't be custom
- // lowered. Return the operation, rather than a null SDValue.
- return Op;
-}
-
-//! Custom lowering for CTPOP (count population)
-/*!
- Custom lowering code that counts the number ones in the input
- operand. SPU has such an instruction, but it counts the number of
- ones per byte, which then have to be accumulated.
-*/
-static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
- DebugLoc dl = Op.getDebugLoc();
-
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: {
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
- }
-
- case MVT::i16: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
-
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i16);
- SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
- SDValue Shift1 = DAG.getConstant(8, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- // CNTB_result becomes the chain to which all of the virtual registers
- // CNTB_reg, SUM1_reg become associated:
- SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
-
- SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
- SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
-
- return DAG.getNode(ISD::AND, dl, MVT::i16,
- DAG.getNode(ISD::ADD, dl, MVT::i16,
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- Tmp1, Shift1),
- Tmp1),
- Mask0);
- }
-
- case MVT::i32: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i32);
- SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
- SDValue Shift1 = DAG.getConstant(16, MVT::i32);
- SDValue Shift2 = DAG.getConstant(8, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- // CNTB_result becomes the chain to which all of the virtual registers
- // CNTB_reg, SUM1_reg become associated:
- SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
-
- SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
- SDValue Comp1 =
- DAG.getNode(ISD::SRL, dl, MVT::i32,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
- Shift1);
-
- SDValue Sum1 =
- DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
-
- SDValue Sum1_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
-
- SDValue Comp2 =
- DAG.getNode(ISD::SRL, dl, MVT::i32,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
- Shift2);
- SDValue Sum2 =
- DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
-
- return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
- }
-
- case MVT::i64:
- break;
- }
-
- return SDValue();
-}
-
-//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
-/*!
- f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
- All conversions to i64 are expanded to a libcall.
- */
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- const SPUTargetLowering &TLI) {
- EVT OpVT = Op.getValueType();
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
- || OpVT == MVT::i64) {
- // Convert f32 / f64 to i32 / i64 via libcall.
- RTLIB::Libcall LC =
- (Op.getOpcode() == ISD::FP_TO_SINT)
- ? RTLIB::getFPTOSINT(Op0VT, OpVT)
- : RTLIB::getFPTOUINT(Op0VT, OpVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
- SDValue Dummy;
- return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
- }
-
- return Op;
-}
-
-//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
-/*!
- i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
- All conversions from i64 are expanded to a libcall.
- */
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- const SPUTargetLowering &TLI) {
- EVT OpVT = Op.getValueType();
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
- || Op0VT == MVT::i64) {
- // Convert i32, i64 to f64 via libcall:
- RTLIB::Libcall LC =
- (Op.getOpcode() == ISD::SINT_TO_FP)
- ? RTLIB::getSINTTOFP(Op0VT, OpVT)
- : RTLIB::getUINTTOFP(Op0VT, OpVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
- SDValue Dummy;
- return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
- }
-
- return Op;
-}
-
-//! Lower ISD::SETCC
-/*!
- This handles MVT::f64 (double floating point) condition lowering
- */
-static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
- DebugLoc dl = Op.getDebugLoc();
- assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
-
- SDValue lhs = Op.getOperand(0);
- SDValue rhs = Op.getOperand(1);
- EVT lhsVT = lhs.getValueType();
- assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
-
- EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
- APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
- EVT IntVT(MVT::i64);
-
- // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
- // selected to a NOP:
- SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
- SDValue lhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
- DAG.getNode(ISD::SRL, dl, IntVT,
- i64lhs, DAG.getConstant(32, MVT::i32)));
- SDValue lhsHi32abs =
- DAG.getNode(ISD::AND, dl, MVT::i32,
- lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue lhsLo32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
-
- // SETO and SETUO only use the lhs operand:
- if (CC->get() == ISD::SETO) {
- // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
- // SETUO
- APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
- return DAG.getNode(ISD::XOR, dl, ccResultVT,
- DAG.getSetCC(dl, ccResultVT,
- lhs, DAG.getConstantFP(0.0, lhsVT),
- ISD::SETUO),
- DAG.getConstant(ccResultAllOnes, ccResultVT));
- } else if (CC->get() == ISD::SETUO) {
- // Evaluates to true if Op0 is [SQ]NaN
- return DAG.getNode(ISD::AND, dl, ccResultVT,
- DAG.getSetCC(dl, ccResultVT,
- lhsHi32abs,
- DAG.getConstant(0x7ff00000, MVT::i32),
- ISD::SETGE),
- DAG.getSetCC(dl, ccResultVT,
- lhsLo32,
- DAG.getConstant(0, MVT::i32),
- ISD::SETGT));
- }
-
- SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
- SDValue rhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
- DAG.getNode(ISD::SRL, dl, IntVT,
- i64rhs, DAG.getConstant(32, MVT::i32)));
-
- // If a value is negative, subtract from the sign magnitude constant:
- SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
-
- // Convert the sign-magnitude representation into 2's complement:
- SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- lhsHi32, DAG.getConstant(31, MVT::i32));
- SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
- SDValue lhsSelect =
- DAG.getNode(ISD::SELECT, dl, IntVT,
- lhsSelectMask, lhsSignMag2TC, i64lhs);
-
- SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- rhsHi32, DAG.getConstant(31, MVT::i32));
- SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
- SDValue rhsSelect =
- DAG.getNode(ISD::SELECT, dl, IntVT,
- rhsSelectMask, rhsSignMag2TC, i64rhs);
-
- unsigned compareOp;
-
- switch (CC->get()) {
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- compareOp = ISD::SETEQ; break;
- case ISD::SETOGT:
- case ISD::SETUGT:
- compareOp = ISD::SETGT; break;
- case ISD::SETOGE:
- case ISD::SETUGE:
- compareOp = ISD::SETGE; break;
- case ISD::SETOLT:
- case ISD::SETULT:
- compareOp = ISD::SETLT; break;
- case ISD::SETOLE:
- case ISD::SETULE:
- compareOp = ISD::SETLE; break;
- case ISD::SETUNE:
- case ISD::SETONE:
- compareOp = ISD::SETNE; break;
- default:
- report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
- }
-
- SDValue result =
- DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
- (ISD::CondCode) compareOp);
-
- if ((CC->get() & 0x8) == 0) {
- // Ordered comparison:
- SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
- lhs, DAG.getConstantFP(0.0, MVT::f64),
- ISD::SETO);
- SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
- rhs, DAG.getConstantFP(0.0, MVT::f64),
- ISD::SETO);
- SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
-
- result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
- }
-
- return result;
-}
-
-//! Lower ISD::SELECT_CC
-/*!
- ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
- SELB instruction.
-
- \note Need to revisit this in the future: if the code path through the true
- and false value computations is longer than the latency of a branch (6
- cycles), then it would be more advantageous to branch and insert a new basic
- block and branch on the condition. However, this code does not make that
- assumption, given the simplisitc uses so far.
- */
-
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT = Op.getValueType();
- SDValue lhs = Op.getOperand(0);
- SDValue rhs = Op.getOperand(1);
- SDValue trueval = Op.getOperand(2);
- SDValue falseval = Op.getOperand(3);
- SDValue condition = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
-
- // NOTE: SELB's arguments: $rA, $rB, $mask
- //
- // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
- // where bits in $mask are 1. CCond will be inverted, having 1s where the
- // condition was true and 0s where the condition was false. Hence, the
- // arguments to SELB get reversed.
-
- // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
- // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
- // with another "cannot select select_cc" assert:
-
- SDValue compare = DAG.getNode(ISD::SETCC, dl,
- TLI.getSetCCResultType(Op.getValueType()),
- lhs, rhs, condition);
- return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
-}
-
-//! Custom lower ISD::TRUNCATE
-static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
-{
- // Type to truncate to
- EVT VT = Op.getValueType();
- MVT simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
- DebugLoc dl = Op.getDebugLoc();
-
- // Type to truncate from
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
- // Create shuffle mask, least significant doubleword of quadword
- unsigned maskHigh = 0x08090a0b;
- unsigned maskLow = 0x0c0d0e0f;
- // Use a shuffle to perform the truncation
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32),
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32));
-
- SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
- Op0, Op0, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
- }
-
- return SDValue(); // Leave the truncate unmolested
-}
-
-/*!
- * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
- * algorithm is to duplicate the sign bit using rotmai to generate at
- * least one byte full of sign bits. Then propagate the "sign-byte" into
- * the leftmost words and the i64/i32 into the rightmost words using shufb.
- *
- * @param Op The sext operand
- * @param DAG The current DAG
- * @return The SDValue with the entire instruction sequence
- */
-static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
-{
- DebugLoc dl = Op.getDebugLoc();
-
- // Type to extend to
- MVT OpVT = Op.getValueType().getSimpleVT();
-
- // Type to extend from
- SDValue Op0 = Op.getOperand(0);
- MVT Op0VT = Op0.getValueType().getSimpleVT();
-
- // extend i8 & i16 via i32
- if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
- Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
- Op0VT = MVT::i32;
- }
-
- // The type to extend to needs to be a i128 and
- // the type to extend from needs to be i64 or i32.
- assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
- "LowerSIGN_EXTEND: input and/or output operand have wrong size");
- (void)OpVT;
-
- // Create shuffle mask
- unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
- unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
- unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- DAG.getConstant(mask1, MVT::i32),
- DAG.getConstant(mask1, MVT::i32),
- DAG.getConstant(mask2, MVT::i32),
- DAG.getConstant(mask3, MVT::i32));
-
- // Word wise arithmetic right shift to generate at least one byte
- // that contains sign bits.
- MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
- SDValue sraVal = DAG.getNode(ISD::SRA,
- dl,
- mvt,
- DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
- DAG.getConstant(31, MVT::i32));
-
- // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
- SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- dl, Op0VT, Op0,
- DAG.getTargetConstant(
- SPU::GPRCRegClass.getID(),
- MVT::i32)), 0);
- // Shuffle bytes - Copy the sign bits into the upper 64 bits
- // and the input value into the lower 64 bits.
- SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
- extended, sraVal, shufMask);
- return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
-}
-
-//! Custom (target-specific) lowering entry point
-/*!
- This is where LLVM's DAG selection process calls to do target-specific
- lowering of nodes.
- */
-SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
- unsigned Opc = (unsigned) Op.getOpcode();
- EVT VT = Op.getValueType();
-
- switch (Opc) {
- default: {
-#ifndef NDEBUG
- errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
- errs() << "Op.getOpcode() = " << Opc << "\n";
- errs() << "*Op.getNode():\n";
- Op.getNode()->dump();
-#endif
- llvm_unreachable(0);
- }
- case ISD::LOAD:
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD:
- case ISD::ZEXTLOAD:
- return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::STORE:
- return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::ConstantPool:
- return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::JumpTable:
- return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::ConstantFP:
- return LowerConstantFP(Op, DAG);
-
- // i8, i64 math ops:
- case ISD::ADD:
- case ISD::SUB:
- case ISD::ROTR:
- case ISD::ROTL:
- case ISD::SRL:
- case ISD::SHL:
- case ISD::SRA: {
- if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc, *this);
- break;
- }
-
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- return LowerFP_TO_INT(Op, DAG, *this);
-
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- return LowerINT_TO_FP(Op, DAG, *this);
-
- // Vector-related lowering.
- case ISD::BUILD_VECTOR:
- return LowerBUILD_VECTOR(Op, DAG);
- case ISD::SCALAR_TO_VECTOR:
- return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::VECTOR_SHUFFLE:
- return LowerVECTOR_SHUFFLE(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT:
- return LowerEXTRACT_VECTOR_ELT(Op, DAG);
- case ISD::INSERT_VECTOR_ELT:
- return LowerINSERT_VECTOR_ELT(Op, DAG);
-
- // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- return LowerByteImmed(Op, DAG);
-
- // Vector and i8 multiply:
- case ISD::MUL:
- if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc, *this);
-
- case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
-
- case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG, *this);
-
- case ISD::SETCC:
- return LowerSETCC(Op, DAG, *this);
-
- case ISD::TRUNCATE:
- return LowerTRUNCATE(Op, DAG);
-
- case ISD::SIGN_EXTEND:
- return LowerSIGN_EXTEND(Op, DAG);
- }
-
- return SDValue();
-}
-
-void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const
-{
-#if 0
- unsigned Opc = (unsigned) N->getOpcode();
- EVT OpVT = N->getValueType(0);
-
- switch (Opc) {
- default: {
- errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
- errs() << "Op.getOpcode() = " << Opc << "\n";
- errs() << "*Op.getNode():\n";
- N->dump();
- abort();
- /*NOTREACHED*/
- }
- }
-#endif
-
- /* Otherwise, return unchanged */
-}
-
-//===----------------------------------------------------------------------===//
-// Target Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
-{
-#if 0
- TargetMachine &TM = getTargetMachine();
-#endif
- const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
- SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0); // everything has at least one operand
- EVT NodeVT = N->getValueType(0); // The node's value type
- EVT Op0VT = Op0.getValueType(); // The first operand's result
- SDValue Result; // Initially, empty result
- DebugLoc dl = N->getDebugLoc();
-
- switch (N->getOpcode()) {
- default: break;
- case ISD::ADD: {
- SDValue Op1 = N->getOperand(1);
-
- if (Op0.getOpcode() == SPUISD::IndirectAddr
- || Op1.getOpcode() == SPUISD::IndirectAddr) {
- // Normalize the operands to reduce repeated code
- SDValue IndirectArg = Op0, AddArg = Op1;
-
- if (Op1.getOpcode() == SPUISD::IndirectAddr) {
- IndirectArg = Op1;
- AddArg = Op0;
- }
-
- if (isa<ConstantSDNode>(AddArg)) {
- ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
- SDValue IndOp1 = IndirectArg.getOperand(1);
-
- if (CN0->isNullValue()) {
- // (add (SPUindirect <arg>, <arg>), 0) ->
- // (SPUindirect <arg>, <arg>)
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
- << "With: (SPUindirect <arg>, <arg>)\n";
- }
-#endif
-
- return IndirectArg;
- } else if (isa<ConstantSDNode>(IndOp1)) {
- // (add (SPUindirect <arg>, <const>), <const>) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
- int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
- SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
- << "), " << CN0->getSExtValue() << ")\n"
- << "With: (SPUindirect <arg>, "
- << combinedConst << ")\n";
- }
-#endif
-
- return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
- IndirectArg, combinedValue);
- }
- }
- }
- break;
- }
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND: {
- if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
- // (any_extend (SPUextract_elt0 <arg>)) ->
- // (SPUextract_elt0 <arg>)
- // Types must match, however...
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\nReplace: ";
- N->dump(&DAG);
- errs() << "\nWith: ";
- Op0.getNode()->dump(&DAG);
- errs() << "\n";
- }
-#endif
-
- return Op0;
- }
- break;
- }
- case SPUISD::IndirectAddr: {
- if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->isNullValue()) {
- // (SPUindirect (SPUaform <addr>, 0), 0) ->
- // (SPUaform <addr>, 0)
-
- DEBUG(errs() << "Replace: ");
- DEBUG(N->dump(&DAG));
- DEBUG(errs() << "\nWith: ");
- DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(errs() << "\n");
-
- return Op0;
- }
- } else if (Op0.getOpcode() == ISD::ADD) {
- SDValue Op1 = N->getOperand(1);
- if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
- // (SPUindirect (add <arg>, <arg>), 0) ->
- // (SPUindirect <arg>, <arg>)
- if (CN1->isNullValue()) {
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
- << "With: (SPUindirect <arg>, <arg>)\n";
- }
-#endif
-
- return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
- Op0.getOperand(0), Op0.getOperand(1));
- }
- }
- }
- break;
- }
- case SPUISD::SHL_BITS:
- case SPUISD::SHL_BYTES:
- case SPUISD::ROTBYTES_LEFT: {
- SDValue Op1 = N->getOperand(1);
-
- // Kill degenerate vector shifts:
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
- if (CN->isNullValue()) {
- Result = Op0;
- }
- }
- break;
- }
- case SPUISD::PREFSLOT2VEC: {
- switch (Op0.getOpcode()) {
- default:
- break;
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND: {
- // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
- // <arg>
- // but only if the SPUprefslot2vec and <arg> types match.
- SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
- SDValue Op000 = Op00.getOperand(0);
- if (Op000.getValueType() == NodeVT) {
- Result = Op000;
- }
- }
- break;
- }
- case SPUISD::VEC2PREFSLOT: {
- // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
- // <arg>
- Result = Op0.getOperand(0);
- break;
- }
- }
- break;
- }
- }
-
- // Otherwise, return unchanged.
-#ifndef NDEBUG
- if (Result.getNode()) {
- DEBUG(errs() << "\nReplace.SPU: ");
- DEBUG(N->dump(&DAG));
- DEBUG(errs() << "\nWith: ");
- DEBUG(Result.getNode()->dump(&DAG));
- DEBUG(errs() << "\n");
- }
-#endif
-
- return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-SPUTargetLowering::ConstraintType
-SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
- if (ConstraintLetter.size() == 1) {
- switch (ConstraintLetter[0]) {
- default: break;
- case 'b':
- case 'r':
- case 'f':
- case 'v':
- case 'y':
- return C_RegisterClass;
- }
- }
- return TargetLowering::getConstraintType(ConstraintLetter);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-SPUTargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
- if (CallOperandVal == NULL)
- return CW_Default;
- // Look at the constraint type.
- switch (*constraint) {
- default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
- //FIXME: Seems like the supported constraint letters were just copied
- // from PPC, as the following doesn't correspond to the GCC docs.
- // I'm leaving it so until someone adds the corresponding lowering support.
- case 'b':
- case 'r':
- case 'f':
- case 'd':
- case 'v':
- case 'y':
- weight = CW_Register;
- break;
- }
- return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const
-{
- if (Constraint.size() == 1) {
- // GCC RS6000 Constraint Letters
- switch (Constraint[0]) {
- case 'b': // R1-R31
- case 'r': // R0-R31
- if (VT == MVT::i64)
- return std::make_pair(0U, &SPU::R64CRegClass);
- return std::make_pair(0U, &SPU::R32CRegClass);
- case 'f':
- if (VT == MVT::f32)
- return std::make_pair(0U, &SPU::R32FPRegClass);
- if (VT == MVT::f64)
- return std::make_pair(0U, &SPU::R64FPRegClass);
- break;
- case 'v':
- return std::make_pair(0U, &SPU::GPRCRegClass);
- }
- }
-
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//! Compute used/known bits for a SPU operand
-void
-SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth ) const {
-#if 0
- const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
-
- switch (Op.getOpcode()) {
- default:
- // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
- break;
- case CALL:
- case SHUFB:
- case SHUFFLE_MASK:
- case CNTB:
- case SPUISD::PREFSLOT2VEC:
- case SPUISD::LDRESULT:
- case SPUISD::VEC2PREFSLOT:
- case SPUISD::SHLQUAD_L_BITS:
- case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_ROTL:
- case SPUISD::VEC_ROTR:
- case SPUISD::ROTBYTES_LEFT:
- case SPUISD::SELECT_MASK:
- case SPUISD::SELB:
- }
-#endif
-}
-
-unsigned
-SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
- unsigned Depth) const {
- switch (Op.getOpcode()) {
- default:
- return 1;
-
- case ISD::SETCC: {
- EVT VT = Op.getValueType();
-
- if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
- VT = MVT::i32;
- }
- return VT.getSizeInBits();
- }
- }
-}
-
-// LowerAsmOperandForConstraint
-void
-SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
- // Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode.
-bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
- Type *Ty) const {
- // SPU's addresses are 256K:
- return (V > -(1 << 18) && V < (1 << 18) - 1);
-}
-
-bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
- return false;
-}
-
-bool
-SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // The SPU target isn't yet aware of offsets.
- return false;
-}
-
-// can we compare to Imm without writing it into a register?
-bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
- //ceqi, cgti, etc. all take s10 operand
- return isInt<10>(Imm);
-}
-
-bool
-SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type * ) const{
-
- // A-form: 18bit absolute address.
- if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
- return true;
-
- // D-form: reg + 14bit offset
- if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
- return true;
-
- // X-form: reg+reg
- if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
- return true;
-
- return false;
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
deleted file mode 100644
index 9f1599fa6fed..000000000000
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ /dev/null
@@ -1,178 +0,0 @@
-//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Cell SPU uses to lower LLVM code into
-// a selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_ISELLOWERING_H
-#define SPU_ISELLOWERING_H
-
-#include "SPU.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-
-namespace llvm {
- namespace SPUISD {
- enum NodeType {
- // Start the numbering where the builting ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Pseudo instructions:
- RET_FLAG, ///< Return with flag, matched by bi instruction
-
- Hi, ///< High address component (upper 16)
- Lo, ///< Low address component (lower 16)
- PCRelAddr, ///< Program counter relative address
- AFormAddr, ///< A-form address (local store)
- IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
-
- LDRESULT, ///< Load result (value, chain)
- CALL, ///< CALL instruction
- SHUFB, ///< Vector shuffle (permute)
- SHUFFLE_MASK, ///< Shuffle mask
- CNTB, ///< Count leading ones in bytes
- PREFSLOT2VEC, ///< Promote scalar->vector
- VEC2PREFSLOT, ///< Extract element 0
- SHL_BITS, ///< Shift quad left, by bits
- SHL_BYTES, ///< Shift quad left, by bytes
- SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
- VEC_ROTL, ///< Vector rotate left
- VEC_ROTR, ///< Vector rotate right
- ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
- ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
- SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
- SELB, ///< Select bits -> (b & mask) | (a & ~mask)
- // Markers: These aren't used to generate target-dependent nodes, but
- // are used during instruction selection.
- ADD64_MARKER, ///< i64 addition marker
- SUB64_MARKER, ///< i64 subtraction marker
- MUL64_MARKER, ///< i64 multiply marker
- LAST_SPUISD ///< Last user-defined instruction
- };
- }
-
- //! Utility functions specific to CellSPU:
- namespace SPU {
- SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
- SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
-
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
- const SPUTargetMachine &TM);
- //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
- SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
- DebugLoc dl);
- }
-
- class SPUTargetMachine; // forward dec'l.
-
- class SPUTargetLowering :
- public TargetLowering
- {
- SPUTargetMachine &SPUTM;
-
- public:
- //! The venerable constructor
- /*!
- This is where the CellSPU backend sets operation handling (i.e., legal,
- custom, expand or promote.)
- */
- SPUTargetLowering(SPUTargetMachine &TM);
-
- //! Get the target machine
- SPUTargetMachine &getSPUTargetMachine() {
- return SPUTM;
- }
-
- /// getTargetNodeName() - This method returns the name of a target specific
- /// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
-
- /// getSetCCResultType - Return the ValueType for ISD::SETCC
- virtual EVT getSetCCResultType(EVT VT) const;
-
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
-
- //! Custom lowering hooks
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- //! Custom lowering hook for nodes with illegal result types.
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
-
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
-
- virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
- unsigned Depth = 0) const;
-
- ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
-
- /// Examine constraint string and operand type and determine a weight value.
- /// The operand object must already have been set up with the operand type.
- ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
-
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
-
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
-
- /// isLegalAddressImmediate - Return true if the integer value can be used
- /// as the offset of the target addressing mode.
- virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
- virtual bool isLegalAddressImmediate(GlobalValue *) const;
-
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- virtual bool isLegalICmpImmediate(int64_t Imm) const;
-
- virtual bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const;
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
deleted file mode 100644
index b495537fc2c8..000000000000
--- a/lib/Target/CellSPU/SPUInstrBuilder.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes functions that may be used with BuildMI from the
-// MachineInstrBuilder.h file to simplify generating frame and constant pool
-// references.
-//
-// For reference, the order of operands for memory references is:
-// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
-// Displacement.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRBUILDER_H
-#define SPU_INSTRBUILDER_H
-
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-namespace llvm {
-
-/// addFrameReference - This function is used to add a reference to the base of
-/// an abstract object on the stack frame of the current function. This
-/// reference has base register as the FrameIndex offset until it is resolved.
-/// This allows a constant offset to be specified as well...
-///
-inline const MachineInstrBuilder&
-addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
- bool mem = true) {
- if (mem)
- return MIB.addImm(Offset).addFrameIndex(FI);
- else
- return MIB.addFrameIndex(FI).addImm(Offset);
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
deleted file mode 100644
index cd3f42214345..000000000000
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ /dev/null
@@ -1,320 +0,0 @@
-//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//
-// Cell SPU instruction formats. Note that these are notationally similar to
-// PowerPC, like "A-Form". But the sizes of operands and fields differ.
-
-// This was kiped from the PPC instruction formats (seemed like a good idea...)
-
-class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
- : Instruction {
- field bits<32> Inst;
-
- let Namespace = "SPU";
- let OutOperandList = OOL;
- let InOperandList = IOL;
- let AsmString = asmstr;
- let Itinerary = itin;
-}
-
-// RR Format
-class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin> {
- bits<7> RA;
- bits<7> RB;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = opcode;
- let Inst{11-17} = RB;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-let RB = 0 in {
- // RR Format, where RB is zeroed (dont care):
- class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-
- let RA = 0 in {
- // RR Format, where RA and RB are zeroed (dont care):
- // Used for reads from status control registers (see FPSCRRr32)
- class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
- }
-}
-
-let RT = 0 in {
- // RR Format, where RT is zeroed (don't care), or as the instruction handbook
- // says, "RT is a false target." Used in "Halt if" instructions
- class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RRR Format
-class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> RA;
- bits<7> RB;
- bits<7> RC;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-3} = opcode;
- let Inst{4-10} = RT;
- let Inst{11-17} = RB;
- let Inst{18-24} = RA;
- let Inst{25-31} = RC;
-}
-
-// RI7 Format
-class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> i7;
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = opcode;
- let Inst{11-17} = i7;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-// CVTIntFp Format
-class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-9} = opcode;
- let Inst{10-17} = 0;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-let RA = 0 in {
- class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-
- let RT = 0 in {
- // Branch instruction format (without D/E flag settings)
- class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-
- class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
- : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
- pattern>
- { }
-
- let RB = 0 in {
- // Return instruction (bi, branch indirect), RA is zero (LR):
- class RETForm<string asmstr, list<dag> pattern>
- : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
- pattern>
- { }
- }
- }
-}
-
-// Branch indirect external data forms:
-class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
- : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
-{
- bits<7> Rcalldest;
-
- let Pattern = pattern;
-
- let Inst{0-10} = 0b11010101100;
- let Inst{11} = 0;
- let Inst{12-13} = DE_flag;
- let Inst{14-17} = 0b0000;
- let Inst{18-24} = Rcalldest;
- let Inst{25-31} = 0b0000000;
-}
-
-// RI10 Format
-class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<10> i10;
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-7} = opcode;
- let Inst{8-17} = i10;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-// RI10 Format, where the constant is zero (or effectively ignored by the
-// SPU)
-let i10 = 0 in {
- class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RI10 Format, where RT is ignored.
-// This format is used primarily by the Halt If ... Immediate set of
-// instructions
-let RT = 0 in {
- class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RI16 Format
-class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<16> i16;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-8} = opcode;
- let Inst{9-24} = i16;
- let Inst{25-31} = RT;
-}
-
-// Specialized version of the RI16 Format for unconditional branch relative and
-// branch absolute, branch and set link. Note that for branch and set link, the
-// link register doesn't have to be $lr, but this is actually hard coded into
-// the instruction pattern.
-
-let RT = 0 in {
- class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- list<dag> pattern>
- : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-
- class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- list<dag> pattern>
- : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-}
-
-//===----------------------------------------------------------------------===//
-// Specialized versions of RI16:
-//===----------------------------------------------------------------------===//
-
-// RI18 Format
-class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<18> i18;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-6} = opcode;
- let Inst{7-24} = i18;
- let Inst{25-31} = RT;
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction formats for intrinsics:
-//===----------------------------------------------------------------------===//
-
-// RI10 Format for v8i16 intrinsics
-class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
- !strconcat(opc, " $rT, $rA, $val"), itin,
- [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))] >;
-
-class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
- !strconcat(opc, " $rT, $rA, $val"), itin,
- [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
- i32ImmSExt10:$val))] >;
-
-// RR Format for v8i16 intrinsics
-class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- !strconcat(opc, " $rT, $rA, $rB"), itin,
- [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))] >;
-
-// RR Format for v4i32 intrinsics
-class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- !strconcat(opc, " $rT, $rA, $rB"), itin,
- [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
- (v4i32 VECREG:$rB)))] >;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions, like call frames:
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
- let OutOperandList = OOL;
- let InOperandList = IOL;
- let AsmString = asmstr;
- let Pattern = pattern;
- let Inst{31-0} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Branch hint formats
-//===----------------------------------------------------------------------===//
-// For hbrr and hbra
-class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
- : Instruction {
- field bits<32> Inst;
- bits<16>i16;
- bits<9>RO;
-
- let Namespace = "SPU";
- let InOperandList = IOL;
- let OutOperandList = (outs); //no output
- let AsmString = asmstr;
- let Itinerary = BranchHints;
-
- let Inst{0-6} = opcode;
- let Inst{7-8} = RO{8-7};
- let Inst{9-24} = i16;
- let Inst{25-31} = RO{6-0};
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
deleted file mode 100644
index b25a6397ec3a..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUInstrInfo.h"
-#include "SPUInstrBuilder.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define GET_INSTRINFO_CTOR
-#include "SPUGenInstrInfo.inc"
-
-using namespace llvm;
-
-namespace {
- //! Predicate for an unconditional branch instruction
- inline bool isUncondBranch(const MachineInstr *I) {
- unsigned opc = I->getOpcode();
-
- return (opc == SPU::BR
- || opc == SPU::BRA
- || opc == SPU::BI);
- }
-
- //! Predicate for a conditional branch instruction
- inline bool isCondBranch(const MachineInstr *I) {
- unsigned opc = I->getOpcode();
-
- return (opc == SPU::BRNZr32
- || opc == SPU::BRNZv4i32
- || opc == SPU::BRZr32
- || opc == SPU::BRZv4i32
- || opc == SPU::BRHNZr16
- || opc == SPU::BRHNZv8i16
- || opc == SPU::BRHZr16
- || opc == SPU::BRHZv8i16);
- }
-}
-
-SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
- : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
- TM(tm),
- RI(*TM.getSubtargetImpl(), *this)
-{ /* NOP */ }
-
-/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
-/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const TargetInstrInfo *TII = TM->getInstrInfo();
- assert(TII && "No InstrInfo?");
- return new SPUHazardRecognizer(*TII);
-}
-
-unsigned
-SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SPU::LQDv16i8:
- case SPU::LQDv8i16:
- case SPU::LQDv4i32:
- case SPU::LQDv4f32:
- case SPU::LQDv2f64:
- case SPU::LQDr128:
- case SPU::LQDr64:
- case SPU::LQDr32:
- case SPU::LQDr16: {
- const MachineOperand MOp1 = MI->getOperand(1);
- const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm() && MOp2.isFI()) {
- FrameIndex = MOp2.getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- }
- return 0;
-}
-
-unsigned
-SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SPU::STQDv16i8:
- case SPU::STQDv8i16:
- case SPU::STQDv4i32:
- case SPU::STQDv4f32:
- case SPU::STQDv2f64:
- case SPU::STQDr128:
- case SPU::STQDr64:
- case SPU::STQDr32:
- case SPU::STQDr16:
- case SPU::STQDr8: {
- const MachineOperand MOp1 = MI->getOperand(1);
- const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm() && MOp2.isFI()) {
- FrameIndex = MOp2.getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- }
- return 0;
-}
-
-void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const
-{
- // We support cross register class moves for our aliases, such as R3 in any
- // reg class to any other reg class containing R3. This is required because
- // we instruction select bitconvert i64 -> f64 as a noop for example, so our
- // types have no specific meaning.
-
- BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-void
-SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == &SPU::GPRCRegClass)
- opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128;
- else if (RC == &SPU::R64CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
- else if (RC == &SPU::R64FPRegClass)
- opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
- else if (RC == &SPU::R32CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
- else if (RC == &SPU::R32FPRegClass)
- opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
- else if (RC == &SPU::R16CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16;
- else if (RC == &SPU::R8CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8;
- else if (RC == &SPU::VECREGRegClass)
- opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8;
- else
- llvm_unreachable("Unknown regclass!");
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
- addFrameReference(BuildMI(MBB, MI, DL, get(opc))
- .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
-}
-
-void
-SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == &SPU::GPRCRegClass)
- opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128;
- else if (RC == &SPU::R64CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
- else if (RC == &SPU::R64FPRegClass)
- opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
- else if (RC == &SPU::R32CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
- else if (RC == &SPU::R32FPRegClass)
- opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
- else if (RC == &SPU::R16CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16;
- else if (RC == &SPU::R8CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8;
- else if (RC == &SPU::VECREGRegClass)
- opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8;
- else
- llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
- addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
-}
-
-//! Branch analysis
-/*!
- \note This code was kiped from PPC. There may be more branch analysis for
- CellSPU than what's currently done here.
- */
-bool
-SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- if (!isUnpredicatedTerminator(I))
- return false;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (isUncondBranch(LastInst)) {
- // Check for jump tables
- if (!LastInst->getOperand(0).isMBB())
- return true;
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else if (isCondBranch(LastInst)) {
- // Block ends with fall-through condbranch.
- TBB = LastInst->getOperand(1).getMBB();
- DEBUG(errs() << "Pushing LastInst: ");
- DEBUG(LastInst->dump());
- Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
- Cond.push_back(LastInst->getOperand(0));
- return false;
- }
- // Otherwise, don't know what this is.
- return true;
- }
-
- // Get the instruction before it if it's a terminator.
- MachineInstr *SecondLastInst = I;
-
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with a conditional and unconditional branch, handle it.
- if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
- TBB = SecondLastInst->getOperand(1).getMBB();
- DEBUG(errs() << "Pushing SecondLastInst: ");
- DEBUG(SecondLastInst->dump());
- Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
- Cond.push_back(SecondLastInst->getOperand(0));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // If the block ends with two unconditional branches, handle it. The second
- // one is not executed, so remove it.
- if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-// search MBB for branch hint labels and branch hit ops
-static void removeHBR( MachineBasicBlock &MBB) {
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
- if (I->getOpcode() == SPU::HBRA ||
- I->getOpcode() == SPU::HBR_LABEL){
- I=MBB.erase(I);
- if (I == MBB.end())
- break;
- }
- }
-}
-
-unsigned
-SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- removeHBR(MBB);
- if (I == MBB.begin())
- return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
- if (!isCondBranch(I) && !isUncondBranch(I))
- return 0;
-
- // Remove the first branch.
- DEBUG(errs() << "Removing branch: ");
- DEBUG(I->dump());
- I->eraseFromParent();
- I = MBB.end();
- if (I == MBB.begin())
- return 1;
-
- --I;
- if (!(isCondBranch(I) || isUncondBranch(I)))
- return 1;
-
- // Remove the second branch.
- DEBUG(errs() << "Removing second branch: ");
- DEBUG(I->dump());
- I->eraseFromParent();
- return 2;
-}
-
-/** Find the optimal position for a hint branch instruction in a basic block.
- * This should take into account:
- * -the branch hint delays
- * -congestion of the memory bus
- * -dual-issue scheduling (i.e. avoid insertion of nops)
- * Current implementation is rather simplistic.
- */
-static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
-{
- MachineBasicBlock::iterator J = MBB.end();
- for( int i=0; i<8; i++) {
- if( J == MBB.begin() ) return J;
- J--;
- }
- return J;
-}
-
-unsigned
-SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
- "SPU branch conditions have two components!");
-
- MachineInstrBuilder MIB;
- //TODO: make a more accurate algorithm.
- bool haveHBR = MBB.size()>8;
-
- removeHBR(MBB);
- MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
- // Add a label just before the branch
- if (haveHBR)
- MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
-
- // One-way branch.
- if (FBB == 0) {
- if (Cond.empty()) {
- // Unconditional branch
- MIB = BuildMI(&MBB, DL, get(SPU::BR));
- MIB.addMBB(TBB);
-
- DEBUG(errs() << "Inserted one-way uncond branch: ");
- DEBUG((*MIB).dump());
-
- // basic blocks have just one branch so it is safe to add the hint a its
- if (haveHBR) {
- MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(TBB);
- }
- } else {
- // Conditional branch
- MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- MIB.addReg(Cond[1].getReg()).addMBB(TBB);
-
- if (haveHBR) {
- MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(TBB);
- }
-
- DEBUG(errs() << "Inserted one-way cond branch: ");
- DEBUG((*MIB).dump());
- }
- return 1;
- } else {
- MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
-
- // Two-way Conditional Branch.
- MIB.addReg(Cond[1].getReg()).addMBB(TBB);
- MIB2.addMBB(FBB);
-
- if (haveHBR) {
- MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(FBB);
- }
-
- DEBUG(errs() << "Inserted conditional branch: ");
- DEBUG((*MIB).dump());
- DEBUG(errs() << "part 2: ");
- DEBUG((*MIB2).dump());
- return 2;
- }
-}
-
-//! Reverses a branch's condition, returning false on success.
-bool
-SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
- const {
- // Pretty brainless way of inverting the condition, but it works, considering
- // there are only two conditions...
- static struct {
- unsigned Opc; //! The incoming opcode
- unsigned RevCondOpc; //! The reversed condition opcode
- } revconds[] = {
- { SPU::BRNZr32, SPU::BRZr32 },
- { SPU::BRNZv4i32, SPU::BRZv4i32 },
- { SPU::BRZr32, SPU::BRNZr32 },
- { SPU::BRZv4i32, SPU::BRNZv4i32 },
- { SPU::BRHNZr16, SPU::BRHZr16 },
- { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
- { SPU::BRHZr16, SPU::BRHNZr16 },
- { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
- };
-
- unsigned Opc = unsigned(Cond[0].getImm());
- // Pretty dull mapping between the two conditions that SPU can generate:
- for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
- if (revconds[i].Opc == Opc) {
- Cond[0].setImm(revconds[i].RevCondOpc);
- return false;
- }
- }
-
- return true;
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
deleted file mode 100644
index 85e5821aefa1..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the CellSPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRUCTIONINFO_H
-#define SPU_INSTRUCTIONINFO_H
-
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "SPUGenInstrInfo.inc"
-
-namespace llvm {
- //! Cell SPU instruction information class
- class SPUInstrInfo : public SPUGenInstrInfo {
- SPUTargetMachine &TM;
- const SPURegisterInfo RI;
- public:
- explicit SPUInstrInfo(SPUTargetMachine &tm);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
-
- ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
- unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- //! Store a register to a stack slot, based on its register class.
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- //! Load a register from a stack slot, based on its register class.
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- //! Reverses a branch's condition, returning false on success.
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
deleted file mode 100644
index 117acd736aaa..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ /dev/null
@@ -1,4484 +0,0 @@
-//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instructions:
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TODO Items (not urgent today, but would be nice, low priority)
-//
-// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
-// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
-// in 16-bit and 32-bit constants and reduce instruction count.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions:
-//===----------------------------------------------------------------------===//
-
-let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
- def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
- "${:comment} ADJCALLSTACKDOWN",
- [(callseq_start timm:$amt)]>;
- def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
- "${:comment} ADJCALLSTACKUP",
- [(callseq_end timm:$amt)]>;
- def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
- "$targ:\t${:comment}branch hint target",[ ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Loads:
-// NB: The ordering is actually important, since the instruction selection
-// will try each of the instructions in sequence, i.e., the D-form first with
-// the 10-bit displacement, then the A-form with the 16 bit displacement, and
-// finally the X-form with the register-register.
-//===----------------------------------------------------------------------===//
-
-let canFoldAsLoad = 1 in {
- class LoadDFormVec<ValueType vectype>
- : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
- "lqd\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
- { }
-
- class LoadDForm<RegisterClass rclass>
- : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
- "lqd\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load dform_addr:$src))]>
- { }
-
- multiclass LoadDForms
- {
- def v16i8: LoadDFormVec<v16i8>;
- def v8i16: LoadDFormVec<v8i16>;
- def v4i32: LoadDFormVec<v4i32>;
- def v2i64: LoadDFormVec<v2i64>;
- def v4f32: LoadDFormVec<v4f32>;
- def v2f64: LoadDFormVec<v2f64>;
-
- def r128: LoadDForm<GPRC>;
- def r64: LoadDForm<R64C>;
- def r32: LoadDForm<R32C>;
- def f32: LoadDForm<R32FP>;
- def f64: LoadDForm<R64FP>;
- def r16: LoadDForm<R16C>;
- def r8: LoadDForm<R8C>;
- }
-
- class LoadAFormVec<ValueType vectype>
- : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
- "lqa\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
- { }
-
- class LoadAForm<RegisterClass rclass>
- : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
- "lqa\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load aform_addr:$src))]>
- { }
-
- multiclass LoadAForms
- {
- def v16i8: LoadAFormVec<v16i8>;
- def v8i16: LoadAFormVec<v8i16>;
- def v4i32: LoadAFormVec<v4i32>;
- def v2i64: LoadAFormVec<v2i64>;
- def v4f32: LoadAFormVec<v4f32>;
- def v2f64: LoadAFormVec<v2f64>;
-
- def r128: LoadAForm<GPRC>;
- def r64: LoadAForm<R64C>;
- def r32: LoadAForm<R32C>;
- def f32: LoadAForm<R32FP>;
- def f64: LoadAForm<R64FP>;
- def r16: LoadAForm<R16C>;
- def r8: LoadAForm<R8C>;
- }
-
- class LoadXFormVec<ValueType vectype>
- : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
- "lqx\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
- { }
-
- class LoadXForm<RegisterClass rclass>
- : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
- "lqx\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load xform_addr:$src))]>
- { }
-
- multiclass LoadXForms
- {
- def v16i8: LoadXFormVec<v16i8>;
- def v8i16: LoadXFormVec<v8i16>;
- def v4i32: LoadXFormVec<v4i32>;
- def v2i64: LoadXFormVec<v2i64>;
- def v4f32: LoadXFormVec<v4f32>;
- def v2f64: LoadXFormVec<v2f64>;
-
- def r128: LoadXForm<GPRC>;
- def r64: LoadXForm<R64C>;
- def r32: LoadXForm<R32C>;
- def f32: LoadXForm<R32FP>;
- def f64: LoadXForm<R64FP>;
- def r16: LoadXForm<R16C>;
- def r8: LoadXForm<R8C>;
- }
-
- defm LQA : LoadAForms;
- defm LQD : LoadDForms;
- defm LQX : LoadXForms;
-
-/* Load quadword, PC relative: Not much use at this point in time.
- Might be of use later for relocatable code. It's effectively the
- same as LQA, but uses PC-relative addressing.
- def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
- "lqr\t$rT, $disp", LoadStore,
- [(set VECREG:$rT, (load iaddr:$disp))]>;
- */
-}
-
-//===----------------------------------------------------------------------===//
-// Stores:
-//===----------------------------------------------------------------------===//
-class StoreDFormVec<ValueType vectype>
- : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
- "stqd\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), dform_addr:$src)]>
-{ }
-
-class StoreDForm<RegisterClass rclass>
- : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
- "stqd\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, dform_addr:$src)]>
-{ }
-
-multiclass StoreDForms
-{
- def v16i8: StoreDFormVec<v16i8>;
- def v8i16: StoreDFormVec<v8i16>;
- def v4i32: StoreDFormVec<v4i32>;
- def v2i64: StoreDFormVec<v2i64>;
- def v4f32: StoreDFormVec<v4f32>;
- def v2f64: StoreDFormVec<v2f64>;
-
- def r128: StoreDForm<GPRC>;
- def r64: StoreDForm<R64C>;
- def r32: StoreDForm<R32C>;
- def f32: StoreDForm<R32FP>;
- def f64: StoreDForm<R64FP>;
- def r16: StoreDForm<R16C>;
- def r8: StoreDForm<R8C>;
-}
-
-class StoreAFormVec<ValueType vectype>
- : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
- "stqa\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), aform_addr:$src)]>;
-
-class StoreAForm<RegisterClass rclass>
- : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
- "stqa\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, aform_addr:$src)]>;
-
-multiclass StoreAForms
-{
- def v16i8: StoreAFormVec<v16i8>;
- def v8i16: StoreAFormVec<v8i16>;
- def v4i32: StoreAFormVec<v4i32>;
- def v2i64: StoreAFormVec<v2i64>;
- def v4f32: StoreAFormVec<v4f32>;
- def v2f64: StoreAFormVec<v2f64>;
-
- def r128: StoreAForm<GPRC>;
- def r64: StoreAForm<R64C>;
- def r32: StoreAForm<R32C>;
- def f32: StoreAForm<R32FP>;
- def f64: StoreAForm<R64FP>;
- def r16: StoreAForm<R16C>;
- def r8: StoreAForm<R8C>;
-}
-
-class StoreXFormVec<ValueType vectype>
- : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
- "stqx\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), xform_addr:$src)]>
-{ }
-
-class StoreXForm<RegisterClass rclass>
- : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
- "stqx\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, xform_addr:$src)]>
-{ }
-
-multiclass StoreXForms
-{
- def v16i8: StoreXFormVec<v16i8>;
- def v8i16: StoreXFormVec<v8i16>;
- def v4i32: StoreXFormVec<v4i32>;
- def v2i64: StoreXFormVec<v2i64>;
- def v4f32: StoreXFormVec<v4f32>;
- def v2f64: StoreXFormVec<v2f64>;
-
- def r128: StoreXForm<GPRC>;
- def r64: StoreXForm<R64C>;
- def r32: StoreXForm<R32C>;
- def f32: StoreXForm<R32FP>;
- def f64: StoreXForm<R64FP>;
- def r16: StoreXForm<R16C>;
- def r8: StoreXForm<R8C>;
-}
-
-defm STQD : StoreDForms;
-defm STQA : StoreAForms;
-defm STQX : StoreXForms;
-
-/* Store quadword, PC relative: Not much use at this point in time. Might
- be useful for relocatable code.
-def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
- "stqr\t$rT, $disp", LoadStore,
- [(store VECREG:$rT, iaddr:$disp)]>;
-*/
-
-//===----------------------------------------------------------------------===//
-// Generate Controls for Insertion:
-//===----------------------------------------------------------------------===//
-
-def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cbd\t$rT, $src", ShuffleOp,
- [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cbx\t$rT, $src", ShuffleOp,
- [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "chd\t$rT, $src", ShuffleOp,
- [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
- "chx\t$rT, $src", ShuffleOp,
- [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cwd\t$rT, $src", ShuffleOp,
- [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cwx\t$rT, $src", ShuffleOp,
- [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cwd\t$rT, $src", ShuffleOp,
- [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cwx\t$rT, $src", ShuffleOp,
- [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cdd\t$rT, $src", ShuffleOp,
- [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cdx\t$rT, $src", ShuffleOp,
- [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cdd\t$rT, $src", ShuffleOp,
- [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cdx\t$rT, $src", ShuffleOp,
- [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-//===----------------------------------------------------------------------===//
-// Constant formation:
-//===----------------------------------------------------------------------===//
-
-def ILHv8i16:
- RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
-
-def ILHr16:
- RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set R16C:$rT, immSExt16:$val)]>;
-
-// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
-// the right constant")
-def ILHr8:
- RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set R8C:$rT, immSExt8:$val)]>;
-
-// IL does sign extension!
-
-class ILInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
- ImmLoad, pattern>;
-
-class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmediateLoad
-{
- def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
- def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
-
- // TODO: Need v2f64, v4f32
-
- def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
- def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
- def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
- def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
-}
-
-defm IL : ImmediateLoad;
-
-class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
- ImmLoad, pattern>;
-
-class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILHUInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadHalfwordUpper
-{
- def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
- def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
-
- def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
- def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
-
- // Loads the high portion of an address
- def hi: ILHURegInst<R32C, symbolHi, hi16>;
-
- // Used in custom lowering constant SFP loads:
- def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
-}
-
-defm ILHU : ImmLoadHalfwordUpper;
-
-// Immediate load address (can also be used to load 18-bit unsigned constants,
-// see the zext 16->32 pattern)
-
-class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
- RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
- LoadNOP, pattern>;
-
-class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILAInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILAInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadAddress
-{
- def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
- def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
-
- def r64: ILARegInst<R64C, u18imm_i64, imm18>;
- def r32: ILARegInst<R32C, u18imm, imm18>;
- def f32: ILARegInst<R32FP, f18imm, fpimm18>;
- def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
-
- def hi: ILARegInst<R32C, symbolHi, imm18>;
- def lo: ILARegInst<R32C, symbolLo, imm18>;
-
- def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
- [(set R32C:$rT, imm18:$val)]>;
-}
-
-defm ILA : ImmLoadAddress;
-
-// Immediate OR, Halfword Lower: The "other" part of loading large constants
-// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
-// Note that these are really two operand instructions, but they're encoded
-// as three operands with the first two arguments tied-to each other.
-
-class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
- ImmLoad, pattern>,
- RegConstraint<"$rS = $rT">,
- NoEncode<"$rS">;
-
-class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
- IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
- [/* no pattern */]>;
-
-class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
- IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
- [/* no pattern */]>;
-
-multiclass ImmOrHalfwordLower
-{
- def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
- def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
-
- def r32: IOHLRegInst<R32C, i32imm>;
- def f32: IOHLRegInst<R32FP, f32imm>;
-
- def lo: IOHLRegInst<R32C, symbolLo>;
-}
-
-defm IOHL: ImmOrHalfwordLower;
-
-// Form select mask for bytes using immediate, used in conjunction with the
-// SELB instruction:
-
-class FSMBIVec<ValueType vectype>:
- RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
- "fsmbi\t$rT, $val",
- SelectOp,
- [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
-
-multiclass FormSelectMaskBytesImm
-{
- def v16i8: FSMBIVec<v16i8>;
- def v8i16: FSMBIVec<v8i16>;
- def v4i32: FSMBIVec<v4i32>;
- def v2i64: FSMBIVec<v2i64>;
-}
-
-defm FSMBI : FormSelectMaskBytesImm;
-
-// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
-class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
- FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMBVecInst<ValueType vectype>:
- FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT),
- (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskBits {
- def v16i8_r16: FSMBRegInst<R16C, v16i8>;
- def v16i8: FSMBVecInst<v16i8>;
-}
-
-defm FSMB: FormSelectMaskBits;
-
-// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
-// only 8-bits wide (even though it's input as 16-bits here)
-
-class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
- FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMHVecInst<ValueType vectype>:
- FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT),
- (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskHalfword {
- def v8i16_r16: FSMHRegInst<R16C, v8i16>;
- def v8i16: FSMHVecInst<v8i16>;
-}
-
-defm FSMH: FormSelectMaskHalfword;
-
-// fsm: Form select mask for words. Like the other fsm* instructions,
-// only the lower 4 bits of $rA are significant.
-
-class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMRegInst<ValueType vectype, RegisterClass rclass>:
- FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMVecInst<ValueType vectype>:
- FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskWord {
- def v4i32: FSMVecInst<v4i32>;
-
- def r32 : FSMRegInst<v4i32, R32C>;
- def r16 : FSMRegInst<v4i32, R16C>;
-}
-
-defm FSM : FormSelectMaskWord;
-
-// Special case when used for i64 math operations
-multiclass FormSelectMaskWord64 {
- def r32 : FSMRegInst<v2i64, R32C>;
- def r16 : FSMRegInst<v2i64, R16C>;
-}
-
-defm FSM64 : FormSelectMaskWord64;
-
-//===----------------------------------------------------------------------===//
-// Integer and Logical Operations:
-//===----------------------------------------------------------------------===//
-
-def AHv8i16:
- RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ah\t$rT, $rA, $rB", IntegerOp,
- [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
-
-def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (AHv8i16 VECREG:$rA, VECREG:$rB)>;
-
-def AHr16:
- RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "ah\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
-
-def AHIvec:
- RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ahi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
- v8i16SExt10Imm:$val))]>;
-
-def AHIr16:
- RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "ahi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
-
-// v4i32, i32 add instruction:
-
-class AInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00000011000, OOL, IOL,
- "a\t$rT, $rA, $rB", IntegerOp,
- pattern>;
-
-class AVecInst<ValueType vectype>:
- AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ARegInst<RegisterClass rclass>:
- AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
-
-multiclass AddInstruction {
- def v4i32: AVecInst<v4i32>;
- def v16i8: AVecInst<v16i8>;
- def r32: ARegInst<R32C>;
-}
-
-defm A : AddInstruction;
-
-class AIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00111000, OOL, IOL,
- "ai\t$rT, $rA, $val", IntegerOp,
- pattern>;
-
-class AIVecInst<ValueType vectype, PatLeaf immpred>:
- AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
-
-class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
- AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [/* no pattern */]>;
-
-class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
- AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
- [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
-
-// This is used to add epsilons to floating point numbers in the f32 fdiv code:
-class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
- AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
- [/* no pattern */]>;
-
-multiclass AddImmediate {
- def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
-
- def r32: AIRegInst<R32C, i32ImmSExt10>;
-
- def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
- def f32: AIFPInst<R32FP, i32ImmSExt10>;
-}
-
-defm AI : AddImmediate;
-
-def SFHvec:
- RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "sfh\t$rT, $rA, $rB", IntegerOp,
- [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def SFHr16:
- RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "sfh\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
-
-def SFHIvec:
- RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "sfhi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
- (v8i16 VECREG:$rA)))]>;
-
-def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "sfhi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
-
-def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- "sf\t$rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
-
-
-def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "sf\t$rT, $rA, $rB", IntegerOp,
- [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
-
-def SFIvec:
- RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "sfi\t$rT, $rA, $val", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
- (v4i32 VECREG:$rA)))]>;
-
-def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
- (ins R32C:$rA, s10imm_i32:$val),
- "sfi\t$rT, $rA, $val", IntegerOp,
- [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
-
-// ADDX: only available in vector form, doesn't match a pattern.
-class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00000010110, OOL, IOL,
- "addx\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ADDXVecInst<ValueType vectype>:
- ADDXInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-class ADDXRegInst<RegisterClass rclass>:
- ADDXInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-multiclass AddExtended {
- def v2i64 : ADDXVecInst<v2i64>;
- def v4i32 : ADDXVecInst<v4i32>;
- def r64 : ADDXRegInst<R64C>;
- def r32 : ADDXRegInst<R32C>;
-}
-
-defm ADDX : AddExtended;
-
-// CG: Generate carry for add
-class CGInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01000011000, OOL, IOL,
- "cg\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class CGVecInst<ValueType vectype>:
- CGInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-class CGRegInst<RegisterClass rclass>:
- CGInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB),
- [/* no pattern */]>;
-
-multiclass CarryGenerate {
- def v2i64 : CGVecInst<v2i64>;
- def v4i32 : CGVecInst<v4i32>;
- def r64 : CGRegInst<R64C>;
- def r32 : CGRegInst<R32C>;
-}
-
-defm CG : CarryGenerate;
-
-// SFX: Subract from, extended. This is used in conjunction with BG to subtract
-// with carry (borrow, in this case)
-class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000010110, OOL, IOL,
- "sfx\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class SFXVecInst<ValueType vectype>:
- SFXInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-class SFXRegInst<RegisterClass rclass>:
- SFXInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-multiclass SubtractExtended {
- def v2i64 : SFXVecInst<v2i64>;
- def v4i32 : SFXVecInst<v4i32>;
- def r64 : SFXRegInst<R64C>;
- def r32 : SFXRegInst<R32C>;
-}
-
-defm SFX : SubtractExtended;
-
-// BG: only available in vector form, doesn't match a pattern.
-class BGInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01000010000, OOL, IOL,
- "bg\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class BGVecInst<ValueType vectype>:
- BGInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-class BGRegInst<RegisterClass rclass>:
- BGInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB),
- [/* no pattern */]>;
-
-multiclass BorrowGenerate {
- def v4i32 : BGVecInst<v4i32>;
- def v2i64 : BGVecInst<v2i64>;
- def r64 : BGRegInst<R64C>;
- def r32 : BGRegInst<R32C>;
-}
-
-defm BG : BorrowGenerate;
-
-// BGX: Borrow generate, extended.
-def BGXvec:
- RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "bgx\t$rT, $rA, $rB", IntegerOp,
- []>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-// Halfword multiply variants:
-// N.B: These can be used to build up larger quantities (16x16 -> 32)
-
-def MPYv8i16:
- RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYr16:
- RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
-
-// Unsigned 16-bit multiply:
-
-class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00110011110, OOL, IOL,
- "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
- pattern>;
-
-def MPYUv4i32:
- MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-def MPYUr16:
- MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
-
-def MPYUr32:
- MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-// mpyi: multiply 16 x s10imm -> 32 result.
-
-class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00101110, OOL, IOL,
- "mpyi\t$rT, $rA, $val", IntegerMulDiv,
- pattern>;
-
-def MPYIvec:
- MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
-def MPYIr16:
- MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
-
-// mpyui: same issues as other multiplies, plus, this doesn't match a
-// pattern... but may be used during target DAG selection or lowering
-
-class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b10101110, OOL, IOL,
- "mpyui\t$rT, $rA, $val", IntegerMulDiv,
- pattern>;
-
-def MPYUIvec:
- MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- []>;
-
-def MPYUIr16:
- MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- []>;
-
-// mpya: 16 x 16 + 16 -> 32 bit result
-class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b0011, OOL, IOL,
- "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
- pattern>;
-
-def MPYAv4i32:
- MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (v4i32 VECREG:$rT),
- (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))),
- (v4i32 VECREG:$rC)))]>;
-
-def MPYAr32:
- MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
- R32C:$rC))]>;
-
-def MPYAr32_sext:
- MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
- R32C:$rC))]>;
-
-def MPYAr32_sextinreg:
- MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
- (sext_inreg R32C:$rB, i16)),
- R32C:$rC))]>;
-
-// mpyh: multiply high, used to synthesize 32-bit multiplies
-class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10100011110, OOL, IOL,
- "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
- pattern>;
-
-def MPYHv4i32:
- MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-def MPYHr32:
- MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-// mpys: multiply high and shift right (returns the top half of
-// a 16-bit multiply, sign extended to 32 bits.)
-
-class MPYSInst<dag OOL, dag IOL>:
- RRForm<0b11100011110, OOL, IOL,
- "mpys\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYSv4i32:
- MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYSr16:
- MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
-
-// mpyhh: multiply high-high (returns the 32-bit result from multiplying
-// the top 16 bits of the $rA, $rB)
-
-class MPYHHInst<dag OOL, dag IOL>:
- RRForm<0b01100011110, OOL, IOL,
- "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHv8i16:
- MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHr32:
- MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhha: Multiply high-high, add to $rT:
-
-class MPYHHAInst<dag OOL, dag IOL>:
- RRForm<0b01100010110, OOL, IOL,
- "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHAvec:
- MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHAr32:
- MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhu: Multiply high-high, unsigned, e.g.:
-//
-// +-------+-------+ +-------+-------+ +---------+
-// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
-// +-------+-------+ +-------+-------+ +---------+
-//
-// where a0, b0 are the upper 16 bits of the 32-bit word
-
-class MPYHHUInst<dag OOL, dag IOL>:
- RRForm<0b01110011110, OOL, IOL,
- "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHUv4i32:
- MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHUr32:
- MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhau: Multiply high-high, unsigned
-
-class MPYHHAUInst<dag OOL, dag IOL>:
- RRForm<0b01110010110, OOL, IOL,
- "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHAUvec:
- MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHAUr32:
- MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// clz: Count leading zeroes
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
- IntegerOp, pattern>;
-
-class CLZRegInst<RegisterClass rclass>:
- CLZInst<(outs rclass:$rT), (ins rclass:$rA),
- [(set rclass:$rT, (ctlz rclass:$rA))]>;
-
-class CLZVecInst<ValueType vectype>:
- CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
-
-multiclass CountLeadingZeroes {
- def v4i32 : CLZVecInst<v4i32>;
- def r32 : CLZRegInst<R32C>;
-}
-
-defm CLZ : CountLeadingZeroes;
-
-// cntb: Count ones in bytes (aka "population count")
-//
-// NOTE: This instruction is really a vector instruction, but the custom
-// lowering code uses it in unorthodox ways to support CTPOP for other
-// data types!
-
-def CNTBv16i8:
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
-
-def CNTBv8i16 :
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
-
-def CNTBv4i32 :
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
-
-// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
-// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
-// slots 1-3.
-//
-// Note: This instruction "pairs" with the fsmb instruction for all of the
-// various types defined here.
-//
-// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
-// a vector or register.
-
-class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
-
-class GBBRegInst<RegisterClass rclass, ValueType vectype>:
- GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBBVecInst<ValueType vectype>:
- GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsFromBytes {
- def v16i8_r32: GBBRegInst<R32C, v16i8>;
- def v16i8_r16: GBBRegInst<R16C, v16i8>;
- def v16i8: GBBVecInst<v16i8>;
-}
-
-defm GBB: GatherBitsFromBytes;
-
-// gbh: Gather all low order bits from each halfword in $rA into a single
-// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
-// and slots 1-3 also set to 0.
-//
-// See notes for GBBInst, above.
-
-class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
- pattern>;
-
-class GBHRegInst<RegisterClass rclass, ValueType vectype>:
- GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBHVecInst<ValueType vectype>:
- GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsHalfword {
- def v8i16_r32: GBHRegInst<R32C, v8i16>;
- def v8i16_r16: GBHRegInst<R16C, v8i16>;
- def v8i16: GBHVecInst<v8i16>;
-}
-
-defm GBH: GatherBitsHalfword;
-
-// gb: Gather all low order bits from each word in $rA into a single
-// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
-// as well as slots 1-3.
-//
-// See notes for gbb, above.
-
-class GBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
- pattern>;
-
-class GBRegInst<RegisterClass rclass, ValueType vectype>:
- GBInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBVecInst<ValueType vectype>:
- GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsWord {
- def v4i32_r32: GBRegInst<R32C, v4i32>;
- def v4i32_r16: GBRegInst<R16C, v4i32>;
- def v4i32: GBVecInst<v4i32>;
-}
-
-defm GB: GatherBitsWord;
-
-// avgb: average bytes
-def AVGB:
- RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "avgb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// absdb: absolute difference of bytes
-def ABSDB:
- RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "absdb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// sumb: sum bytes into halfwords
-def SUMB:
- RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "sumb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// Sign extension operations:
-class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101010, OOL, IOL,
- "xsbh\t$rDst, $rSrc",
- IntegerOp, pattern>;
-
-class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
- XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
- pattern>;
-
-multiclass ExtendByteHalfword {
- def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
- [
- /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
- def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
- [(set R16C:$rDst, (sext R8C:$rSrc))]>;
- def r16: XSBHInRegInst<R16C,
- [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
-
- // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
- // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
- // pattern below). Intentionally doesn't match a pattern because we want the
- // sext 8->32 pattern to do the work for us, namely because we need the extra
- // XSHWr32.
- def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
-
- // Same as the 32-bit version, but for i64
- def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSBH : ExtendByteHalfword;
-
-// Sign extend halfwords to words:
-
-class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
- IntegerOp, pattern>;
-
-class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
- XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
- [(set (out_vectype VECREG:$rDest),
- (sext (in_vectype VECREG:$rSrc)))]>;
-
-class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
- XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
- pattern>;
-
-class XSHWRegInst<RegisterClass rclass>:
- XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
- [(set rclass:$rDest, (sext R16C:$rSrc))]>;
-
-multiclass ExtendHalfwordWord {
- def v4i32: XSHWVecInst<v8i16, v4i32>;
-
- def r16: XSHWRegInst<R32C>;
-
- def r32: XSHWInRegInst<R32C,
- [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
- def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSHW : ExtendHalfwordWord;
-
-// Sign-extend words to doublewords (32->64 bits)
-
-class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
- IntegerOp, pattern>;
-
-class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
- XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
- [/*(set (out_vectype VECREG:$rDst),
- (sext (out_vectype VECREG:$rSrc)))*/]>;
-
-class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
- XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
- [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
-
-multiclass ExtendWordToDoubleWord {
- def v2i64: XSWDVecInst<v4i32, v2i64>;
- def r64: XSWDRegInst<R32C, R64C>;
-
- def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
- [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
-}
-
-defm XSWD : ExtendWordToDoubleWord;
-
-// AND operations
-
-class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ANDVecInst<ValueType vectype>:
- ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ANDRegInst<RegisterClass rclass>:
- ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseAnd
-{
- def v16i8: ANDVecInst<v16i8>;
- def v8i16: ANDVecInst<v8i16>;
- def v4i32: ANDVecInst<v4i32>;
- def v2i64: ANDVecInst<v2i64>;
-
- def r128: ANDRegInst<GPRC>;
- def r64: ANDRegInst<R64C>;
- def r32: ANDRegInst<R32C>;
- def r16: ANDRegInst<R16C>;
- def r8: ANDRegInst<R8C>;
-
- //===---------------------------------------------
- // Special instructions to perform the fabs instruction
- def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- //===---------------------------------------------
-
- // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
- // quantities -- see 16->32 zext pattern.
- //
- // This pattern is somewhat artificial, since it might match some
- // compiler generated pattern but it is unlikely to do so.
-
- def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
-}
-
-defm AND : BitwiseAnd;
-
-
-def vnot_cell_conv : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
-
-// N.B.: vnot_cell_conv is one of those special target selection pattern
-// fragments,
-// in which we expect there to be a bit_convert on the constant. Bear in mind
-// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
-// constant -1 vector.)
-
-class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
- ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (and (vectype VECREG:$rA),
- (vnot_frag (vectype VECREG:$rB))))]>;
-
-class ANDCRegInst<RegisterClass rclass>:
- ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass AndComplement
-{
- def v16i8: ANDCVecInst<v16i8>;
- def v8i16: ANDCVecInst<v8i16>;
- def v4i32: ANDCVecInst<v4i32>;
- def v2i64: ANDCVecInst<v2i64>;
-
- def r128: ANDCRegInst<GPRC>;
- def r64: ANDCRegInst<R64C>;
- def r32: ANDCRegInst<R32C>;
- def r16: ANDCRegInst<R16C>;
- def r8: ANDCRegInst<R8C>;
-
- // Sometimes, the xor pattern has a bitcast constant:
- def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
-}
-
-defm ANDC : AndComplement;
-
-class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass AndByteImm
-{
- def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT),
- (and (v16i8 VECREG:$rA),
- (v16i8 v16i8U8Imm:$val)))]>;
-
- def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
-}
-
-defm ANDBI : AndByteImm;
-
-class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass AndHalfwordImm
-{
- def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
- def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
- [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
-
- // Zero-extend i8 to i16:
- def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
- [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
-}
-
-defm ANDHI : AndHalfwordImm;
-
-class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-multiclass AndWordImm
-{
- def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
-
- def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
-
- // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
- // pattern below.
- def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
- [(set R32C:$rT,
- (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
-
- // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
- // zext 16->32 pattern below.
- //
- // Note that this pattern is somewhat artificial, since it might match
- // something the compiler generates but is unlikely to occur in practice.
- def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
- [(set R32C:$rT,
- (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
-}
-
-defm ANDI : AndWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Bitwise OR group:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Bitwise "or" (N.B.: These are also register-register copy instructions...)
-class ORInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ORVecInst<ValueType vectype>:
- ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ORRegInst<RegisterClass rclass>:
- ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
-
-
-multiclass BitwiseOr
-{
- def v16i8: ORVecInst<v16i8>;
- def v8i16: ORVecInst<v8i16>;
- def v4i32: ORVecInst<v4i32>;
- def v2i64: ORVecInst<v2i64>;
-
- def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4f32 VECREG:$rT),
- (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
- (v4i32 VECREG:$rB)))))]>;
-
- def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v2f64 VECREG:$rT),
- (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB)))))]>;
-
- def r128: ORRegInst<GPRC>;
- def r64: ORRegInst<R64C>;
- def r32: ORRegInst<R32C>;
- def r16: ORRegInst<R16C>;
- def r8: ORRegInst<R8C>;
-
- // OR instructions used to copy f32 and f64 registers.
- def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [/* no pattern */]>;
-
- def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- [/* no pattern */]>;
-}
-
-defm OR : BitwiseOr;
-
-//===----------------------------------------------------------------------===//
-// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
- (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
-
-def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
- (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
-
-def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
- (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
-
-def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
- (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
-
-def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
- (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
-
-def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
- (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
-
-def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
- (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
-
-def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
- (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
-
-def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
- (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
-
-def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
- (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
-
-def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
- (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
-
-def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
- (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
-
-// Load Register: This is an assembler alias for a bitwise OR of a register
-// against itself. It's here because it brings some clarity to assembly
-// language output.
-
-let hasCtrlDep = 1 in {
- class LRInst<dag OOL, dag IOL>
- : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = [/*no pattern*/];
-
- let Inst{0-10} = 0b10000010000; /* It's an OR operation */
- let Inst{11-17} = RA;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
- }
-
- class LRVecInst<ValueType vectype>:
- LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
- class LRRegInst<RegisterClass rclass>:
- LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
-
- multiclass LoadRegister {
- def v2i64: LRVecInst<v2i64>;
- def v2f64: LRVecInst<v2f64>;
- def v4i32: LRVecInst<v4i32>;
- def v4f32: LRVecInst<v4f32>;
- def v8i16: LRVecInst<v8i16>;
- def v16i8: LRVecInst<v16i8>;
-
- def r128: LRRegInst<GPRC>;
- def r64: LRRegInst<R64C>;
- def f64: LRRegInst<R64FP>;
- def r32: LRRegInst<R32C>;
- def f32: LRRegInst<R32FP>;
- def r16: LRRegInst<R16C>;
- def r8: LRRegInst<R8C>;
- }
-
- defm LR: LoadRegister;
-}
-
-// ORC: Bitwise "or" with complement (c = a | ~b)
-
-class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ORCVecInst<ValueType vectype>:
- ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- (vnot (vectype VECREG:$rB))))]>;
-
-class ORCRegInst<RegisterClass rclass>:
- ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass BitwiseOrComplement
-{
- def v16i8: ORCVecInst<v16i8>;
- def v8i16: ORCVecInst<v8i16>;
- def v4i32: ORCVecInst<v4i32>;
- def v2i64: ORCVecInst<v2i64>;
-
- def r128: ORCRegInst<GPRC>;
- def r64: ORCRegInst<R64C>;
- def r32: ORCRegInst<R32C>;
- def r16: ORCRegInst<R16C>;
- def r8: ORCRegInst<R8C>;
-}
-
-defm ORC : BitwiseOrComplement;
-
-// OR byte immediate
-class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
- ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
- (vectype immpred:$val)))]>;
-
-multiclass BitwiseOrByteImm
-{
- def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
-
- def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
-}
-
-defm ORBI : BitwiseOrByteImm;
-
-// OR halfword immediate
-class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
- ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- immpred:$val))]>;
-
-multiclass BitwiseOrHalfwordImm
-{
- def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
-
- def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
- [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
-
- // Specialized ORHI form used to promote 8-bit registers to 16-bit
- def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
- [(set R16C:$rT, (or (anyext R8C:$rA),
- i16ImmSExt10:$val))]>;
-}
-
-defm ORHI : BitwiseOrHalfwordImm;
-
-class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORIVecInst<ValueType vectype, PatLeaf immpred>:
- ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- immpred:$val))]>;
-
-// Bitwise "or" with immediate
-multiclass BitwiseOrImm
-{
- def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
-
- def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
-
- // i16i32: hacked version of the ori instruction to extend 16-bit quantities
- // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
- // infra "anyext 16->32" pattern.)
- def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or (anyext R16C:$rA),
- i32ImmSExt10:$val))]>;
-
- // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
- // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
- // infra "anyext 16->32" pattern.)
- def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or (anyext R8C:$rA),
- i32ImmSExt10:$val))]>;
-}
-
-defm ORI : BitwiseOrImm;
-
-// ORX: "or" across the vector: or's $rA's word slots leaving the result in
-// $rT[0], slots 1-3 are zeroed.
-//
-// FIXME: Needs to match an intrinsic pattern.
-def ORXv4i32:
- RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "orx\t$rT, $rA, $rB", IntegerOp,
- []>;
-
-// XOR:
-
-class XORInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class XORVecInst<ValueType vectype>:
- XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class XORRegInst<RegisterClass rclass>:
- XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseExclusiveOr
-{
- def v16i8: XORVecInst<v16i8>;
- def v8i16: XORVecInst<v8i16>;
- def v4i32: XORVecInst<v4i32>;
- def v2i64: XORVecInst<v2i64>;
-
- def r128: XORRegInst<GPRC>;
- def r64: XORRegInst<R64C>;
- def r32: XORRegInst<R32C>;
- def r16: XORRegInst<R16C>;
- def r8: XORRegInst<R8C>;
-
- // XOR instructions used to negate f32 and f64 quantities.
-
- def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
- [/* no pattern */]>;
-
- def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
- [/* no pattern */]>;
-
- def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern, see fneg{32,64} */]>;
-}
-
-defm XOR : BitwiseExclusiveOr;
-
-//==----------------------------------------------------------
-
-class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-multiclass XorByteImm
-{
- def v16i8:
- XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
-
- def r8:
- XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
-}
-
-defm XORBI : XorByteImm;
-
-def XORHIv8i16:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- "xorhi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
- v8i16SExt10Imm:$val))]>;
-
-def XORHIr16:
- RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "xorhi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
-
-def XORIv4i32:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
- "xori\t$rT, $rA, $val", IntegerOp,
- [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
- v4i32SExt10Imm:$val))]>;
-
-def XORIr32:
- RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- "xori\t$rT, $rA, $val", IntegerOp,
- [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
-
-// NAND:
-
-class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class NANDVecInst<ValueType vectype>:
- NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
- (vectype VECREG:$rB))))]>;
-class NANDRegInst<RegisterClass rclass>:
- NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNand
-{
- def v16i8: NANDVecInst<v16i8>;
- def v8i16: NANDVecInst<v8i16>;
- def v4i32: NANDVecInst<v4i32>;
- def v2i64: NANDVecInst<v2i64>;
-
- def r128: NANDRegInst<GPRC>;
- def r64: NANDRegInst<R64C>;
- def r32: NANDRegInst<R32C>;
- def r16: NANDRegInst<R16C>;
- def r8: NANDRegInst<R8C>;
-}
-
-defm NAND : BitwiseNand;
-
-// NOR:
-
-class NORInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class NORVecInst<ValueType vectype>:
- NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
- (vectype VECREG:$rB))))]>;
-class NORRegInst<RegisterClass rclass>:
- NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNor
-{
- def v16i8: NORVecInst<v16i8>;
- def v8i16: NORVecInst<v8i16>;
- def v4i32: NORVecInst<v4i32>;
- def v2i64: NORVecInst<v2i64>;
-
- def r128: NORRegInst<GPRC>;
- def r64: NORRegInst<R64C>;
- def r32: NORRegInst<R32C>;
- def r16: NORRegInst<R16C>;
- def r8: NORRegInst<R8C>;
-}
-
-defm NOR : BitwiseNor;
-
-// Select bits:
-class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
- IntegerOp, pattern>;
-
-class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
- (and (vnot_frag (vectype VECREG:$rC)),
- (vectype VECREG:$rA))))]>;
-
-class SELBVecVCondInst<ValueType vectype>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (vectype VECREG:$rT),
- (select (vectype VECREG:$rC),
- (vectype VECREG:$rB),
- (vectype VECREG:$rA)))]>;
-
-class SELBVecCondInst<ValueType vectype>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
- [(set (vectype VECREG:$rT),
- (select R32C:$rC,
- (vectype VECREG:$rB),
- (vectype VECREG:$rA)))]>;
-
-class SELBRegInst<RegisterClass rclass>:
- SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
- [(set rclass:$rT,
- (or (and rclass:$rB, rclass:$rC),
- (and rclass:$rA, (not rclass:$rC))))]>;
-
-class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
- SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
- [(set rclass:$rT,
- (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
-
-multiclass SelectBits
-{
- def v16i8: SELBVecInst<v16i8>;
- def v8i16: SELBVecInst<v8i16>;
- def v4i32: SELBVecInst<v4i32>;
- def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
-
- def r128: SELBRegInst<GPRC>;
- def r64: SELBRegInst<R64C>;
- def r32: SELBRegInst<R32C>;
- def r16: SELBRegInst<R16C>;
- def r8: SELBRegInst<R8C>;
-
- def v16i8_cond: SELBVecCondInst<v16i8>;
- def v8i16_cond: SELBVecCondInst<v8i16>;
- def v4i32_cond: SELBVecCondInst<v4i32>;
- def v2i64_cond: SELBVecCondInst<v2i64>;
-
- def v16i8_vcond: SELBVecCondInst<v16i8>;
- def v8i16_vcond: SELBVecCondInst<v8i16>;
- def v4i32_vcond: SELBVecCondInst<v4i32>;
- def v2i64_vcond: SELBVecCondInst<v2i64>;
-
- def v4f32_cond:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (v4f32 VECREG:$rT),
- (select (v4i32 VECREG:$rC),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rA)))]>;
-
- // SELBr64_cond is defined in SPU64InstrInfo.td
- def r32_cond: SELBRegCondInst<R32C, R32C>;
- def f32_cond: SELBRegCondInst<R32C, R32FP>;
- def r16_cond: SELBRegCondInst<R16C, R16C>;
- def r8_cond: SELBRegCondInst<R8C, R8C>;
-}
-
-defm SELB : SelectBits;
-
-class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
- Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
- (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
-
-def : SPUselbPatVec<v16i8, SELBv16i8>;
-def : SPUselbPatVec<v8i16, SELBv8i16>;
-def : SPUselbPatVec<v4i32, SELBv4i32>;
-def : SPUselbPatVec<v2i64, SELBv2i64>;
-
-class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
- Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
- (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
-
-def : SPUselbPatReg<R8C, SELBr8>;
-def : SPUselbPatReg<R16C, SELBr16>;
-def : SPUselbPatReg<R32C, SELBr32>;
-def : SPUselbPatReg<R64C, SELBr64>;
-
-// EQV: Equivalence (1 for each same bit, otherwise 0)
-//
-// Note: There are a lot of ways to match this bit operator and these patterns
-// attempt to be as exhaustive as possible.
-
-class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class EQVVecInst<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
- (and (vnot (vectype VECREG:$rA)),
- (vnot (vectype VECREG:$rB)))))]>;
-
-class EQVRegInst<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
- (and (not rclass:$rA), (not rclass:$rB))))]>;
-
-class EQVVecPattern1<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern1<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
-
-class EQVVecPattern2<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
- (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
-
-class EQVRegPattern2<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (or (and rclass:$rA, rclass:$rB),
- (not (or rclass:$rA, rclass:$rB))))]>;
-
-class EQVVecPattern3<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern3<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitEquivalence
-{
- def v16i8: EQVVecInst<v16i8>;
- def v8i16: EQVVecInst<v8i16>;
- def v4i32: EQVVecInst<v4i32>;
- def v2i64: EQVVecInst<v2i64>;
-
- def v16i8_1: EQVVecPattern1<v16i8>;
- def v8i16_1: EQVVecPattern1<v8i16>;
- def v4i32_1: EQVVecPattern1<v4i32>;
- def v2i64_1: EQVVecPattern1<v2i64>;
-
- def v16i8_2: EQVVecPattern2<v16i8>;
- def v8i16_2: EQVVecPattern2<v8i16>;
- def v4i32_2: EQVVecPattern2<v4i32>;
- def v2i64_2: EQVVecPattern2<v2i64>;
-
- def v16i8_3: EQVVecPattern3<v16i8>;
- def v8i16_3: EQVVecPattern3<v8i16>;
- def v4i32_3: EQVVecPattern3<v4i32>;
- def v2i64_3: EQVVecPattern3<v2i64>;
-
- def r128: EQVRegInst<GPRC>;
- def r64: EQVRegInst<R64C>;
- def r32: EQVRegInst<R32C>;
- def r16: EQVRegInst<R16C>;
- def r8: EQVRegInst<R8C>;
-
- def r128_1: EQVRegPattern1<GPRC>;
- def r64_1: EQVRegPattern1<R64C>;
- def r32_1: EQVRegPattern1<R32C>;
- def r16_1: EQVRegPattern1<R16C>;
- def r8_1: EQVRegPattern1<R8C>;
-
- def r128_2: EQVRegPattern2<GPRC>;
- def r64_2: EQVRegPattern2<R64C>;
- def r32_2: EQVRegPattern2<R32C>;
- def r16_2: EQVRegPattern2<R16C>;
- def r8_2: EQVRegPattern2<R8C>;
-
- def r128_3: EQVRegPattern3<GPRC>;
- def r64_3: EQVRegPattern3<R64C>;
- def r32_3: EQVRegPattern3<R32C>;
- def r16_3: EQVRegPattern3<R16C>;
- def r8_3: EQVRegPattern3<R8C>;
-}
-
-defm EQV: BitEquivalence;
-
-//===----------------------------------------------------------------------===//
-// Vector shuffle...
-//===----------------------------------------------------------------------===//
-// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
-// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
-// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
-// the SPUISD::SHUFB opcode.
-//===----------------------------------------------------------------------===//
-
-class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
- ShuffleOp, pattern>;
-
-class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
- SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (resultvec VECREG:$rT),
- (SPUshuffle (resultvec VECREG:$rA),
- (resultvec VECREG:$rB),
- (maskvec VECREG:$rC)))]>;
-
-class SHUFBGPRCInst:
- SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
- [/* no pattern */]>;
-
-multiclass ShuffleBytes
-{
- def v16i8 : SHUFBVecInst<v16i8, v16i8>;
- def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
- def v8i16 : SHUFBVecInst<v8i16, v16i8>;
- def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
- def v4i32 : SHUFBVecInst<v4i32, v16i8>;
- def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
- def v2i64 : SHUFBVecInst<v2i64, v16i8>;
- def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
-
- def v4f32 : SHUFBVecInst<v4f32, v16i8>;
- def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
-
- def v2f64 : SHUFBVecInst<v2f64, v16i8>;
- def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
-
- def gprc : SHUFBGPRCInst;
-}
-
-defm SHUFB : ShuffleBytes;
-
-//===----------------------------------------------------------------------===//
-// Shift and rotate group:
-//===----------------------------------------------------------------------===//
-
-class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class SHLHVecInst<ValueType vectype>:
- SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass ShiftLeftHalfword
-{
- def v8i16: SHLHVecInst<v8i16>;
- def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
- def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
-}
-
-defm SHLH : ShiftLeftHalfword;
-
-//===----------------------------------------------------------------------===//
-
-class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class SHLHIVecInst<ValueType vectype>:
- SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
-
-multiclass ShiftLeftHalfwordImm
-{
- def v8i16: SHLHIVecInst<v8i16>;
- def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
- [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
-}
-
-defm SHLHI : ShiftLeftHalfwordImm;
-
-def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
- (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
-
-def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
- (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
-
-//===----------------------------------------------------------------------===//
-
-class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-multiclass ShiftLeftWord
-{
- def v4i32:
- SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
- def r32:
- SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
-}
-
-defm SHL: ShiftLeftWord;
-
-//===----------------------------------------------------------------------===//
-
-class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-multiclass ShiftLeftWordImm
-{
- def v4i32:
- SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (v4i32 VECREG:$rT),
- (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
-
- def r32:
- SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
- [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLI : ShiftLeftWordImm;
-
-//===----------------------------------------------------------------------===//
-// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
-// register) to the left. Vector form is here to ensure type correctness.
-//
-// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
-// of 7 bits is actually possible.
-//
-// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
-// to shift i64 and i128. SHLQBI is the residual left over after shifting by
-// bytes with SHLQBY.
-
-class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBIVecInst<ValueType vectype>:
- SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
-
-class SHLQBIRegInst<RegisterClass rclass>:
- SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass ShiftLeftQuadByBits
-{
- def v16i8: SHLQBIVecInst<v16i8>;
- def v8i16: SHLQBIVecInst<v8i16>;
- def v4i32: SHLQBIVecInst<v4i32>;
- def v4f32: SHLQBIVecInst<v4f32>;
- def v2i64: SHLQBIVecInst<v2i64>;
- def v2f64: SHLQBIVecInst<v2f64>;
-
- def r128: SHLQBIRegInst<GPRC>;
-}
-
-defm SHLQBI : ShiftLeftQuadByBits;
-
-// See note above on SHLQBI. In this case, the predicate actually does then
-// enforcement, whereas with SHLQBI, we have to "take it on faith."
-class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class SHLQBIIVecInst<ValueType vectype>:
- SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
-
-multiclass ShiftLeftQuadByBitsImm
-{
- def v16i8 : SHLQBIIVecInst<v16i8>;
- def v8i16 : SHLQBIIVecInst<v8i16>;
- def v4i32 : SHLQBIIVecInst<v4i32>;
- def v4f32 : SHLQBIIVecInst<v4f32>;
- def v2i64 : SHLQBIIVecInst<v2i64>;
- def v2f64 : SHLQBIIVecInst<v2f64>;
-}
-
-defm SHLQBII : ShiftLeftQuadByBitsImm;
-
-// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
-// not by bits. See notes above on SHLQBI.
-
-class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBYVecInst<ValueType vectype>:
- SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
-
-multiclass ShiftLeftQuadBytes
-{
- def v16i8: SHLQBYVecInst<v16i8>;
- def v8i16: SHLQBYVecInst<v8i16>;
- def v4i32: SHLQBYVecInst<v4i32>;
- def v4f32: SHLQBYVecInst<v4f32>;
- def v2i64: SHLQBYVecInst<v2i64>;
- def v2f64: SHLQBYVecInst<v2f64>;
- def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
- [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
-}
-
-defm SHLQBY: ShiftLeftQuadBytes;
-
-class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class SHLQBYIVecInst<ValueType vectype>:
- SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
-
-multiclass ShiftLeftQuadBytesImm
-{
- def v16i8: SHLQBYIVecInst<v16i8>;
- def v8i16: SHLQBYIVecInst<v8i16>;
- def v4i32: SHLQBYIVecInst<v4i32>;
- def v4f32: SHLQBYIVecInst<v4f32>;
- def v2i64: SHLQBYIVecInst<v2i64>;
- def v2f64: SHLQBYIVecInst<v2f64>;
- def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
- [(set GPRC:$rT,
- (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLQBYI : ShiftLeftQuadBytesImm;
-
-class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBYBIVecInst<ValueType vectype>:
- SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-class SHLQBYBIRegInst<RegisterClass rclass>:
- SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass ShiftLeftQuadBytesBitCount
-{
- def v16i8: SHLQBYBIVecInst<v16i8>;
- def v8i16: SHLQBYBIVecInst<v8i16>;
- def v4i32: SHLQBYBIVecInst<v4i32>;
- def v4f32: SHLQBYBIVecInst<v4f32>;
- def v2i64: SHLQBYBIVecInst<v2i64>;
- def v2f64: SHLQBYBIVecInst<v2f64>;
-
- def r128: SHLQBYBIRegInst<GPRC>;
-}
-
-defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class ROTHVecInst<ValueType vectype>:
- ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
-
-class ROTHRegInst<RegisterClass rclass>:
- ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
-
-multiclass RotateLeftHalfword
-{
- def v8i16: ROTHVecInst<v8i16>;
- def r16: ROTHRegInst<R16C>;
-}
-
-defm ROTH: RotateLeftHalfword;
-
-def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword, immediate:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTHIVecInst<ValueType vectype>:
- ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
-
-multiclass RotateLeftHalfwordImm
-{
- def v8i16: ROTHIVecInst<v8i16>;
- def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
- [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
- def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
- [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm ROTHI: RotateLeftHalfwordImm;
-
-def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
- (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class ROTVecInst<ValueType vectype>:
- ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
-
-class ROTRegInst<RegisterClass rclass>:
- ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [(set rclass:$rT,
- (rotl rclass:$rA, R32C:$rB))]>;
-
-multiclass RotateLeftWord
-{
- def v4i32: ROTVecInst<v4i32>;
- def r32: ROTRegInst<R32C>;
-}
-
-defm ROT: RotateLeftWord;
-
-// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
-// 32-bit register
-def ROTr32_r16_anyext:
- ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
- [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
- (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
- (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def ROTr32_r8_anyext:
- ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
- [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
- (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
- (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
- ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
-
-class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
- ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
-
-multiclass RotateLeftWordImm
-{
- def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
- def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
- def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
-
- def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
- def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
- def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
-}
-
-defm ROTI : RotateLeftWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQBYGenInst<ValueType type, RegisterClass rc>:
- ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
- [(set (type rc:$rT),
- (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
-
-class ROTQBYVecInst<ValueType type>:
- ROTQBYGenInst<type, VECREG>;
-
-multiclass RotateQuadLeftByBytes
-{
- def v16i8: ROTQBYVecInst<v16i8>;
- def v8i16: ROTQBYVecInst<v8i16>;
- def v4i32: ROTQBYVecInst<v4i32>;
- def v4f32: ROTQBYVecInst<v4f32>;
- def v2i64: ROTQBYVecInst<v2i64>;
- def v2f64: ROTQBYVecInst<v2f64>;
- def i128: ROTQBYGenInst<i128, GPRC>;
-}
-
-defm ROTQBY: RotateQuadLeftByBytes;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count), immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
- ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
- [(set (type rclass:$rT),
- (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
-
-class ROTQBYIVecInst<ValueType vectype>:
- ROTQBYIGenInst<vectype, VECREG>;
-
-multiclass RotateQuadByBytesImm
-{
- def v16i8: ROTQBYIVecInst<v16i8>;
- def v8i16: ROTQBYIVecInst<v8i16>;
- def v4i32: ROTQBYIVecInst<v4i32>;
- def v4f32: ROTQBYIVecInst<v4f32>;
- def v2i64: ROTQBYIVecInst<v2i64>;
- def vfi64: ROTQBYIVecInst<v2f64>;
- def i128: ROTQBYIGenInst<i128, GPRC>;
-}
-
-defm ROTQBYI: RotateQuadByBytesImm;
-
-// See ROTQBY note above.
-class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00110011100, OOL, IOL,
- "rotqbybi\t$rT, $rA, $shift",
- RotShiftQuad, pattern>;
-
-class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
- ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
-
-multiclass RotateQuadByBytesByBitshift {
- def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
- def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
- def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
- def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
-}
-
-defm ROTQBYBI : RotateQuadByBytesByBitshift;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// See ROTQBY note above.
-//
-// Assume that the user of this instruction knows to shift the rotate count
-// into bit 29
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQBIVecInst<ValueType vectype>:
- ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern yet */]>;
-
-class ROTQBIRegInst<RegisterClass rclass>:
- ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCount
-{
- def v16i8: ROTQBIVecInst<v16i8>;
- def v8i16: ROTQBIVecInst<v8i16>;
- def v4i32: ROTQBIVecInst<v4i32>;
- def v2i64: ROTQBIVecInst<v2i64>;
-
- def r128: ROTQBIRegInst<GPRC>;
- def r64: ROTQBIRegInst<R64C>;
-}
-
-defm ROTQBI: RotateQuadByBitCount;
-
-class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
- [/* no pattern yet */]>;
-
-class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCountImm
-{
- def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
- def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
- def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
- def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
-
- def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
- def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
-}
-
-defm ROTQBII : RotateQuadByBitCountImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTHM v8i16 form:
-// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
-// so this only matches a synthetically generated/lowered code
-// fragment.
-// NOTE(2): $rB must be negated before the right rotate!
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-def ROTHMv8i16:
- ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
-// Note: This instruction doesn't match a pattern because rB must be negated
-// for the instruction to work. Thus, the pattern below the instruction!
-
-def ROTHMr16:
- ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [/* see patterns below - $rB must be negated! */]>;
-
-def : Pat<(srl R16C:$rA, R32C:$rB),
- (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R16C:$rA, R16C:$rB),
- (ROTHMr16 R16C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R16C:$rA, R8C:$rB),
- (ROTHMr16 R16C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
-
-// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
-// that the immediate can be complemented, so that the user doesn't have to
-// worry about it.
-
-class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-def ROTHMIv8i16:
- ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- [/* no pattern */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def ROTHMIr16:
- ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
- [/* no pattern */]>;
-
-def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, uimm7:$val)>;
-
-def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTM v4i32 form: See the ROTHM v8i16 comments.
-class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-def ROTMv4i32:
- ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
-
-def ROTMr32:
- ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(srl R32C:$rA, R32C:$rB),
- (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R32C:$rA, R16C:$rB),
- (ROTMr32 R32C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R32C:$rA, R8C:$rB),
- (ROTMr32 R32C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-// ROTMI v4i32 form: See the comment for ROTHM v8i16.
-def ROTMIv4i32:
- RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotShiftVec,
- [(set (v4i32 VECREG:$rT),
- (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
- (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
- (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTMI r32 form: know how to complement the immediate value.
-def ROTMIr32:
- RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotShiftVec,
- [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(srl R32C:$rA, (i16 imm:$val)),
- (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(srl R32C:$rA, (i8 imm:$val)),
- (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTQMBY: This is a vector form merely so that when used in an
-// instruction pattern, type checking will succeed. This instruction assumes
-// that the user knew to negate $rB.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBYVecInst<ValueType vectype>:
- ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern, $rB must be negated */]>;
-
-class ROTQMBYRegInst<RegisterClass rclass>:
- ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass RotateQuadBytes
-{
- def v16i8: ROTQMBYVecInst<v16i8>;
- def v8i16: ROTQMBYVecInst<v8i16>;
- def v4i32: ROTQMBYVecInst<v4i32>;
- def v2i64: ROTQMBYVecInst<v2i64>;
-
- def r128: ROTQMBYRegInst<GPRC>;
- def r64: ROTQMBYRegInst<R64C>;
-}
-
-defm ROTQMBY : RotateQuadBytes;
-
-def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
- (ROTQMBYr128 GPRC:$rA,
- (SFIr32 R32C:$rB, 0))>;
-
-class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQMBYIVecInst<ValueType vectype>:
- ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern */]>;
-
-// 128-bit zero extension form:
-class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
- ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern */]>;
-
-multiclass RotateQuadBytesImm
-{
- def v16i8: ROTQMBYIVecInst<v16i8>;
- def v8i16: ROTQMBYIVecInst<v8i16>;
- def v4i32: ROTQMBYIVecInst<v4i32>;
- def v2i64: ROTQMBYIVecInst<v2i64>;
-
- def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
- def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
-
- def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
- def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
- def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
- def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
-}
-
-defm ROTQMBYI : RotateQuadBytesImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate right and mask by bit count
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBYBIVecInst<ValueType vectype>:
- ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern, */]>;
-
-multiclass RotateMaskQuadByBitCount
-{
- def v16i8: ROTQMBYBIVecInst<v16i8>;
- def v8i16: ROTQMBYBIVecInst<v8i16>;
- def v4i32: ROTQMBYBIVecInst<v4i32>;
- def v2i64: ROTQMBYBIVecInst<v2i64>;
- def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
- [/*no pattern*/]>;
-}
-
-defm ROTQMBYBI: RotateMaskQuadByBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits
-// Note that the rotate amount has to be negated
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBIVecInst<ValueType vectype>:
- ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-class ROTQMBIRegInst<RegisterClass rclass>:
- ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBits
-{
- def v16i8: ROTQMBIVecInst<v16i8>;
- def v8i16: ROTQMBIVecInst<v8i16>;
- def v4i32: ROTQMBIVecInst<v4i32>;
- def v2i64: ROTQMBIVecInst<v2i64>;
-
- def r128: ROTQMBIRegInst<GPRC>;
- def r64: ROTQMBIRegInst<R64C>;
-}
-
-defm ROTQMBI: RotateMaskQuadByBits;
-
-def : Pat<(srl GPRC:$rA, R32C:$rB),
- (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
- (SFIr32 R32C:$rB, 0)),
- (SFIr32 R32C:$rB, 0))>;
-
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQMBIIVecInst<ValueType vectype>:
- ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-class ROTQMBIIRegInst<RegisterClass rclass>:
- ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBitsImm
-{
- def v16i8: ROTQMBIIVecInst<v16i8>;
- def v8i16: ROTQMBIIVecInst<v8i16>;
- def v4i32: ROTQMBIIVecInst<v4i32>;
- def v2i64: ROTQMBIIVecInst<v2i64>;
-
- def r128: ROTQMBIIRegInst<GPRC>;
- def r64: ROTQMBIIRegInst<R64C>;
-}
-
-defm ROTQMBII: RotateMaskQuadByBitsImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def ROTMAHv8i16:
- RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "rotmah\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-def ROTMAHr16:
- RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R16C:$rA, R32C:$rB),
- (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R16C:$rA, R16C:$rB),
- (ROTMAHr16 R16C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R16C:$rA, R8C:$rB),
- (ROTMAHr16 R16C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-def ROTMAHIv8i16:
- RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- "rotmahi\t$rT, $rA, $val", RotShiftVec,
- [(set (v8i16 VECREG:$rT),
- (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
- (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
- (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def ROTMAHIr16:
- RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
- "rotmahi\t$rT, $rA, $val", RotShiftVec,
- [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
-
-def : Pat<(sra R16C:$rA, (i32 imm:$val)),
- (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(sra R16C:$rA, (i8 imm:$val)),
- (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def ROTMAv4i32:
- RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "rotma\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
-
-def ROTMAr32:
- RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R32C:$rA, R32C:$rB),
- (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R32C:$rA, R16C:$rB),
- (ROTMAr32 R32C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R32C:$rA, R8C:$rB),
- (ROTMAr32 R32C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011110000, OOL, IOL,
- "rotmai\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
- ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
-
-class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
- ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
- [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
-
-multiclass RotateMaskAlgebraicImm {
- def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
- def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
- def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
- def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
-}
-
-defm ROTMAI : RotateMaskAlgebraicImm;
-
-//===----------------------------------------------------------------------===//
-// Branch and conditionals:
-//===----------------------------------------------------------------------===//
-
-let isTerminator = 1, isBarrier = 1 in {
- // Halt If Equal (r32 preferred slot only, no vector form)
- def HEQr32:
- RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
- "heq\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HEQIr32 :
- RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
- "heqi\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-
- // HGT/HGTI: These instructions use signed arithmetic for the comparison,
- // contrasting with HLGT/HLGTI, which use unsigned comparison:
- def HGTr32:
- RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
- "hgt\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HGTIr32:
- RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
- "hgti\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-
- def HLGTr32:
- RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
- "hlgt\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HLGTIr32:
- RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
- "hlgti\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-}
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Comparison operators for i8, i16 and i32:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualByte
-{
- def v16i8 :
- CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
-}
-
-class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualByteImm
-{
- def v16i8 :
- CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
-}
-
-class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualHalfword
-{
- def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
-}
-
-class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualHalfwordImm
-{
- def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (seteq (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualWord
-{
- def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
-}
-
-class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualWordImm
-{
- def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (seteq (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrByte
-{
- def v16i8 :
- CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
-}
-
-class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrByteImm
-{
- def v16i8 :
- CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrHalfword
-{
- def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
-}
-
-class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrHalfwordImm
-{
- def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (setgt (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrWord
-{
- def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
-}
-
-class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrWordImm
-{
- def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
-
- // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
- def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
- [/* no pattern */]>;
-}
-
-class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrByte
-{
- def v16i8 :
- CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
-}
-
-class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrByteImm
-{
- def v16i8 :
- CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrHalfword
-{
- def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
-}
-
-class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrHalfwordImm
-{
- def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (setugt (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrWord
-{
- def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
-}
-
-class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrWordImm
-{
- def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setugt (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-defm CEQB : CmpEqualByte;
-defm CEQBI : CmpEqualByteImm;
-defm CEQH : CmpEqualHalfword;
-defm CEQHI : CmpEqualHalfwordImm;
-defm CEQ : CmpEqualWord;
-defm CEQI : CmpEqualWordImm;
-defm CGTB : CmpGtrByte;
-defm CGTBI : CmpGtrByteImm;
-defm CGTH : CmpGtrHalfword;
-defm CGTHI : CmpGtrHalfwordImm;
-defm CGT : CmpGtrWord;
-defm CGTI : CmpGtrWordImm;
-defm CLGTB : CmpLGtrByte;
-defm CLGTBI : CmpLGtrByteImm;
-defm CLGTH : CmpLGtrHalfword;
-defm CLGTHI : CmpLGtrHalfwordImm;
-defm CLGT : CmpLGtrWord;
-defm CLGTI : CmpLGtrWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// For SETCC primitives not supported above (setlt, setle, setge, etc.)
-// define a pattern to generate the right code, as a binary operator
-// (in a manner of speaking.)
-//
-// Notes:
-// 1. This only matches the setcc set of conditionals. Special pattern
-// matching is used for select conditionals.
-//
-// 2. The "DAG" versions of these classes is almost exclusively used for
-// i64 comparisons. See the tblgen fundamentals documentation for what
-// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
-// class for where ResultInstrs originates.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr xorinst, SPUInstr cmpare>:
- Pat<(cond rclass:$rA, rclass:$rB),
- (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
-
-class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
- PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
- Pat<(cond rclass:$rA, (inttype immpred:$imm)),
- (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
-
-def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
-def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
-
-def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
-def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
-
-def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
-def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
-
-class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
- SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
- Pat<(cond rclass:$rA, rclass:$rB),
- (binop (cmpOp1 rclass:$rA, rclass:$rB),
- (cmpOp2 rclass:$rA, rclass:$rB))>;
-
-class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
- ValueType immtype,
- SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
- Pat<(cond rclass:$rA, (immtype immpred:$imm)),
- (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
- (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
-
-def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
-def : Pat<(setle R8C:$rA, R8C:$rB),
- (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def : Pat<(setle R8C:$rA, immU8:$imm),
- (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
- ORr16, CGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
-def : Pat<(setle R16C:$rA, R16C:$rB),
- (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
- (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
- ORr32, CGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
-def : Pat<(setle R32C:$rA, R32C:$rB),
- (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
- (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
-def : Pat<(setule R8C:$rA, R8C:$rB),
- (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def : Pat<(setule R8C:$rA, immU8:$imm),
- (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
- ORr16, CLGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
- CLGTHIr16, CEQHIr16>;
-def : Pat<(setule R16C:$rA, R16C:$rB),
- (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
- (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
- ORr32, CLGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
-def : Pat<(setule R32C:$rA, R32C:$rB),
- (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
- (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// select conditional patterns:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr selinstr, SPUInstr cmpare>:
- Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rTrue, rclass:$rFalse,
- (cmpare rclass:$rA, rclass:$rB))>;
-
-class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
- PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
- Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rTrue, rclass:$rFalse,
- (cmpare rclass:$rA, immpred:$imm))>;
-
-def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
-def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
-def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
-def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
-def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
-def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
-
-def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
-def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
-def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
-def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
-def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
-def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
-
-def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
-def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
-def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
-def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
-def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
-def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
-
-class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
- SPUInstr cmpOp2>:
- Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rFalse, rclass:$rTrue,
- (binop (cmpOp1 rclass:$rA, rclass:$rB),
- (cmpOp2 rclass:$rA, rclass:$rB)))>;
-
-class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
- ValueType inttype,
- SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
- SPUInstr cmpOp2>:
- Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rFalse, rclass:$rTrue,
- (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
- (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
-
-def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
- SELBr8, ORr8, CGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
- SELBr16, ORr16, CGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
-def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
- SELBr32, ORr32, CGTIr32, CEQIr32>;
-
-def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
- SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
- SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
-def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
- SELBr32, ORr32, CLGTIr32, CEQIr32>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-let isCall = 1,
- // All calls clobber the non-callee-saved registers:
- Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
- R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
- R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
- R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
- R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
- R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
- R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
- R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
- // All of these instructions use $lr (aka $0)
- Uses = [R0] in {
- // Branch relative and set link: Used if we actually know that the target
- // is within [-32768, 32767] bytes of the target
- def BRSL:
- BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func),
- "brsl\t$$lr, $func",
- [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
-
- // Branch absolute and set link: Used if we actually know that the target
- // is an absolute address
- def BRASL:
- BranchSetLink<0b011001100, (outs), (ins calltarget:$func),
- "brasl\t$$lr, $func",
- [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
-
- // Branch indirect and set link if external data. These instructions are not
- // actually generated, matched by an intrinsic:
- def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
-
- // Branch indirect and set link. This is the "X-form" address version of a
- // function call
- def BISL:
- BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
-}
-
-// Support calls to external symbols:
-def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
- (BRSL texternalsym:$func)>;
-
-def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
- (BRASL texternalsym:$func)>;
-
-// Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
- let isBarrier = 1 in {
- def BR :
- UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
- "br\t$dest",
- [(br bb:$dest)]>;
-
- // Unconditional, absolute address branch
- def BRA:
- UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
- "bra\t$dest",
- [/* no pattern */]>;
-
- // Indirect branch
- let isIndirectBranch = 1 in {
- def BI:
- BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
- }
- }
-
- // Conditional branches:
- class BRNZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
- BranchResolv, pattern>;
-
- class BRNZRegInst<RegisterClass rclass>:
- BRNZInst<(ins rclass:$rCond, brtarget:$dest),
- [(brcond rclass:$rCond, bb:$dest)]>;
-
- class BRNZVecInst<ValueType vectype>:
- BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
- [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
-
- multiclass BranchNotZero {
- def v4i32 : BRNZVecInst<v4i32>;
- def r32 : BRNZRegInst<R32C>;
- }
-
- defm BRNZ : BranchNotZero;
-
- class BRZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
- BranchResolv, pattern>;
-
- class BRZRegInst<RegisterClass rclass>:
- BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
- class BRZVecInst<ValueType vectype>:
- BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchZero {
- def v4i32: BRZVecInst<v4i32>;
- def r32: BRZRegInst<R32C>;
- }
-
- defm BRZ: BranchZero;
-
- // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
- // be useful:
- /*
- class BINZInst<dag IOL, list<dag> pattern>:
- BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
-
- class BINZRegInst<RegisterClass rclass>:
- BINZInst<(ins rclass:$rA, brtarget:$dest),
- [(brcond rclass:$rA, R32C:$dest)]>;
-
- class BINZVecInst<ValueType vectype>:
- BINZInst<(ins VECREG:$rA, R32C:$dest),
- [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
-
- multiclass BranchNotZeroIndirect {
- def v4i32: BINZVecInst<v4i32>;
- def r32: BINZRegInst<R32C>;
- }
-
- defm BINZ: BranchNotZeroIndirect;
-
- class BIZInst<dag IOL, list<dag> pattern>:
- BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
-
- class BIZRegInst<RegisterClass rclass>:
- BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
-
- class BIZVecInst<ValueType vectype>:
- BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
-
- multiclass BranchZeroIndirect {
- def v4i32: BIZVecInst<v4i32>;
- def r32: BIZRegInst<R32C>;
- }
-
- defm BIZ: BranchZeroIndirect;
- */
-
- class BRHNZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
- pattern>;
-
- class BRHNZRegInst<RegisterClass rclass>:
- BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
- [(brcond rclass:$rCond, bb:$dest)]>;
-
- class BRHNZVecInst<ValueType vectype>:
- BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchNotZeroHalfword {
- def v8i16: BRHNZVecInst<v8i16>;
- def r16: BRHNZRegInst<R16C>;
- }
-
- defm BRHNZ: BranchNotZeroHalfword;
-
- class BRHZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
- pattern>;
-
- class BRHZRegInst<RegisterClass rclass>:
- BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
- class BRHZVecInst<ValueType vectype>:
- BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchZeroHalfword {
- def v8i16: BRHZVecInst<v8i16>;
- def r16: BRHZRegInst<R16C>;
- }
-
- defm BRHZ: BranchZeroHalfword;
-}
-
-//===----------------------------------------------------------------------===//
-// setcc and brcond patterns:
-//===----------------------------------------------------------------------===//
-
-def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
- (BRHZr16 R16C:$rA, bb:$dest)>;
-def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
- (BRHNZr16 R16C:$rA, bb:$dest)>;
-
-def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
- (BRZr32 R32C:$rA, bb:$dest)>;
-def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
- (BRNZr32 R32C:$rA, bb:$dest)>;
-
-multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
-defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
-defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
- SPUInstr orinst32, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
- (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
- bb:$dest)>;
-
- def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
- (CEQHr16 R16C:$rA, R16:$rB)),
- bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
- (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
- bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
- (CEQr32 R32C:$rA, R32C:$rB)),
- bb:$dest)>;
-}
-
-defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
-
-multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
-defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
-
-multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
- SPUInstr orinst32, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
- (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
- bb:$dest)>;
-
- def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
- (CEQHr16 R16C:$rA, R16:$rB)),
- bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
- (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
- bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
- (CEQr32 R32C:$rA, R32C:$rB)),
- bb:$dest)>;
-}
-
-defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
-
-let isTerminator = 1, isBarrier = 1 in {
- let isReturn = 1 in {
- def RET:
- RETForm<"bi\t$$lr", [(retflag)]>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Single precision floating point instructions
-//===----------------------------------------------------------------------===//
-
-class FAInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
- SPrecFP, pattern>;
-
-class FAVecInst<ValueType vectype>:
- FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPAdd
-{
- def v4f32: FAVecInst<v4f32>;
- def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FA : SFPAdd;
-
-class FSInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
- SPrecFP, pattern>;
-
-class FSVecInst<ValueType vectype>:
- FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPSub
-{
- def v4f32: FSVecInst<v4f32>;
- def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FS : SFPSub;
-
-class FMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01100011010, OOL, IOL,
- "fm\t$rT, $rA, $rB", SPrecFP,
- pattern>;
-
-class FMVecInst<ValueType type>:
- FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (type VECREG:$rT),
- (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
-
-multiclass SFPMul
-{
- def v4f32: FMVecInst<v4f32>;
- def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FM : SFPMul;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-
-
-
-// Floating point reciprocal estimate
-
-class FRESTInst<dag OOL, dag IOL>:
- RRForm_1<0b00110111000, OOL, IOL,
- "frest\t$rT, $rA", SPrecFP,
- [/* no pattern */]>;
-
-def FRESTv4f32 :
- FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
-def FRESTf32 :
- FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
-
-// Floating point interpolate (used in conjunction with reciprocal estimate)
-def FIv4f32 :
- RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fi\t$rT, $rA, $rB", SPrecFP,
- [/* no pattern */]>;
-
-def FIf32 :
- RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fi\t$rT, $rA, $rB", SPrecFP,
- [/* no pattern */]>;
-
-//--------------------------------------------------------------------------
-// Basic single precision floating point comparisons:
-//
-// Note: There is no support on SPU for single precision NaN. Consequently,
-// ordered and unordered comparisons are the same.
-//--------------------------------------------------------------------------
-
-def FCEQf32 :
- RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fceq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
- (FCEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMEQf32 :
- RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcmeq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
- (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCGTf32 :
- RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setogt R32FP:$rA, R32FP:$rB),
- (FCGTf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMGTf32 :
- RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcmgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
- (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
-
-//--------------------------------------------------------------------------
-// Single precision floating point comparisons and SETCC equivalents:
-//--------------------------------------------------------------------------
-
-def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
-def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
-
-def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
-
-def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
-
-def : Pat<(setule R32FP:$rA, R32FP:$rB),
- (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-def : Pat<(setole R32FP:$rA, R32FP:$rB),
- (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-
-// FP Status and Control Register Write
-// Why isn't rT a don't care in the ISA?
-// Should we create a special RRForm_3 for this guy and zero out the rT?
-def FSCRWf32 :
- RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
- "fscrwr\t$rA", SPrecFP,
- [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
-
-// FP Status and Control Register Read
-def FSCRRf32 :
- RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
- "fscrrd\t$rT", SPrecFP,
- [/* This instruction requires an intrinsic */]>;
-
-// llvm instruction space
-// How do these map onto cell instructions?
-// fdiv rA rB
-// frest rC rB # c = 1/b (both lines)
-// fi rC rB rC
-// fm rD rA rC # d = a * 1/b
-// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
-// fma rB rB rC rD # b = b * c + d
-// = -(d *b -a) * c + d
-// = a * c - c ( a *b *c - a)
-
-// fcopysign (???)
-
-// Library calls:
-// These llvm instructions will actually map to library calls.
-// All that's needed, then, is to check that the appropriate library is
-// imported and do a brsl to the proper function name.
-// frem # fmod(x, y): x - (x/y) * y
-// (Note: fmod(double, double), fmodf(float,float)
-// fsqrt?
-// fsin?
-// fcos?
-// Unimplemented SPU instruction space
-// floating reciprocal absolute square root estimate (frsqest)
-
-// The following are probably just intrinsics
-// status and control register write
-// status and control register read
-
-//--------------------------------------
-// Floating Point Conversions
-// Signed conversions:
-def CSiFv4f32:
- CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "csflt\t$rT, $rA, 0", SPrecFP,
- [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
-
-// Convert signed integer to floating point
-def CSiFf32 :
- CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
- "csflt\t$rT, $rA, 0", SPrecFP,
- [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
-
-// Convert unsigned into to float
-def CUiFv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cuflt\t$rT, $rA, 0", SPrecFP,
- [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
-
-def CUiFf32 :
- CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
- "cuflt\t$rT, $rA, 0", SPrecFP,
- [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
-
-// Convert float to unsigned int
-// Assume that scale = 0
-
-def CFUiv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cfltu\t$rT, $rA, 0", SPrecFP,
- [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
-
-def CFUif32 :
- CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
- "cfltu\t$rT, $rA, 0", SPrecFP,
- [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
-
-// Convert float to signed int
-// Assume that scale = 0
-
-def CFSiv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cflts\t$rT, $rA, 0", SPrecFP,
- [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
-
-def CFSif32 :
- CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
- "cflts\t$rT, $rA, 0", SPrecFP,
- [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
-
-//===----------------------------------------------------------------------==//
-// Single<->Double precision conversions
-//===----------------------------------------------------------------------==//
-
-// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
-// v4f32, output is v2f64--which goes in the name?)
-
-// Floating point extend single to double
-// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
-// operates on two double-word slots (i.e. 1st and 3rd fp numbers
-// are ignored).
-def FESDvec :
- RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "fesd\t$rT, $rA", SPrecFP,
- [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
-
-def FESDf32 :
- RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
- "fesd\t$rT, $rA", SPrecFP,
- [(set R64FP:$rT, (fextend R32FP:$rA))]>;
-
-// Floating point round double to single
-//def FRDSvec :
-// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
-// "frds\t$rT, $rA,", SPrecFP,
-// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
-
-def FRDSf64 :
- RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
- "frds\t$rT, $rA", SPrecFP,
- [(set R32FP:$rT, (fround R64FP:$rA))]>;
-
-//ToDo include anyextend?
-
-//===----------------------------------------------------------------------==//
-// Double precision floating point instructions
-//===----------------------------------------------------------------------==//
-def FAf64 :
- RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfa\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
-
-def FAv2f64 :
- RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfa\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FSf64 :
- RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfs\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
-
-def FSv2f64 :
- RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfs\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMf64 :
- RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfm\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
-
-def FMv2f64:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfm\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMAf64:
- RRForm<0b00111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfma\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMAv2f64:
- RRForm<0b00111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfma\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fadd (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMSf64 :
- RRForm<0b10111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfms\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMSv2f64 :
- RRForm<0b10111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfms\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
- (v2f64 VECREG:$rC)))]>;
-
-// DFNMS: - (a * b - c)
-// - (a * b) + c => c - (a * b)
-
-class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
- DPrecFP, pattern>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-class DFNMSVecInst<list<dag> pattern>:
- DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- pattern>;
-
-class DFNMSRegInst<list<dag> pattern>:
- DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- pattern>;
-
-multiclass DFMultiplySubtract
-{
- def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB))))]>;
-
- def f64 : DFNMSRegInst<[(set R64FP:$rT,
- (fsub R64FP:$rC,
- (fmul R64FP:$rA, R64FP:$rB)))]>;
-}
-
-defm DFNMS : DFMultiplySubtract;
-
-// - (a * b + c)
-// - (a * b) - c
-def FNMAf64 :
- RRForm<0b11111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfnma\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FNMAv2f64 :
- RRForm<0b11111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfnma\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fneg (fadd (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-//===----------------------------------------------------------------------==//
-// Floating point negation and absolute value
-//===----------------------------------------------------------------------==//
-
-def : Pat<(fneg (v4f32 VECREG:$rA)),
- (XORfnegvec (v4f32 VECREG:$rA),
- (v4f32 (ILHUv4i32 0x8000)))>;
-
-def : Pat<(fneg R32FP:$rA),
- (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
-
-// Floating point absolute value
-// Note: f64 fabs is custom-selected.
-
-def : Pat<(fabs R32FP:$rA),
- (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
-
-def : Pat<(fabs (v4f32 VECREG:$rA)),
- (ANDfabsvec (v4f32 VECREG:$rA),
- (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
-
-//===----------------------------------------------------------------------===//
-// Hint for branch instructions:
-//===----------------------------------------------------------------------===//
-def HBRA :
- HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
-
-//===----------------------------------------------------------------------===//
-// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
-// in the odd pipeline)
-//===----------------------------------------------------------------------===//
-
-def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
- let Pattern = [];
-
- let Inst{0-10} = 0b10000000010;
- let Inst{11-17} = 0;
- let Inst{18-24} = 0;
- let Inst{25-31} = 0;
-}
-
-def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
- let Pattern = [];
-
- let Inst{0-10} = 0b10000000000;
- let Inst{11-17} = 0;
- let Inst{18-24} = 0;
- let Inst{25-31} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Bit conversions (type conversions between vector/packed types)
-// NOTE: Promotions are handled using the XS* instructions.
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
-
-def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
-
-def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
-
-def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
-
-def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
-
-def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-
-def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
- (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
- (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
- (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
- (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
- (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
- (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-
-def : Pat<(i32 (bitconvert R32FP:$rA)),
- (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
-
-def : Pat<(f32 (bitconvert R32C:$rA)),
- (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
-
-def : Pat<(i64 (bitconvert R64FP:$rA)),
- (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
-
-def : Pat<(f64 (bitconvert R64C:$rA)),
- (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
-
-
-//===----------------------------------------------------------------------===//
-// Instruction patterns:
-//===----------------------------------------------------------------------===//
-
-// General 32-bit constants:
-def : Pat<(i32 imm:$imm),
- (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
-
-// Single precision float constants:
-def : Pat<(f32 fpimm:$imm),
- (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
-
-// General constant 32-bit vectors
-def : Pat<(v4i32 v4i32Imm:$imm),
- (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
- (LO16_vec v4i32Imm:$imm))>;
-
-// 8-bit constants
-def : Pat<(i8 imm:$imm),
- (ILHr8 imm:$imm)>;
-
-//===----------------------------------------------------------------------===//
-// Zero/Any/Sign extensions
-//===----------------------------------------------------------------------===//
-
-// sext 8->32: Sign extend bytes to words
-def : Pat<(sext_inreg R32C:$rSrc, i8),
- (XSHWr32 (XSBHr32 R32C:$rSrc))>;
-
-def : Pat<(i32 (sext R8C:$rSrc)),
- (XSHWr16 (XSBHr8 R8C:$rSrc))>;
-
-// sext 8->64: Sign extend bytes to double word
-def : Pat<(sext_inreg R64C:$rSrc, i8),
- (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
-
-def : Pat<(i64 (sext R8C:$rSrc)),
- (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
-
-// zext 8->16: Zero extend bytes to halfwords
-def : Pat<(i16 (zext R8C:$rSrc)),
- (ANDHIi8i16 R8C:$rSrc, 0xff)>;
-
-// zext 8->32: Zero extend bytes to words
-def : Pat<(i32 (zext R8C:$rSrc)),
- (ANDIi8i32 R8C:$rSrc, 0xff)>;
-
-// zext 8->64: Zero extend bytes to double words
-def : Pat<(i64 (zext R8C:$rSrc)),
- (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
- (COPY_TO_REGCLASS
- (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
- 0x4),
- (ILv4i32 0x0),
- (FSMBIv4i32 0x0f0f)), R64C)>;
-
-// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
-def : Pat<(i16 (anyext R8C:$rSrc)),
- (ORHIi8i16 R8C:$rSrc, 0)>;
-
-// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
-def : Pat<(i32 (anyext R8C:$rSrc)),
- (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
-
-// sext 16->64: Sign extend halfword to double word
-def : Pat<(sext_inreg R64C:$rSrc, i16),
- (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
-
-def : Pat<(sext R16C:$rSrc),
- (XSWDr64 (XSHWr16 R16C:$rSrc))>;
-
-// zext 16->32: Zero extend halfwords to words
-def : Pat<(i32 (zext R16C:$rSrc)),
- (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
- (ANDIi16i32 R16C:$rSrc, 0xf)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
- (ANDIi16i32 R16C:$rSrc, 0xff)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
- (ANDIi16i32 R16C:$rSrc, 0xfff)>;
-
-// anyext 16->32: Extend 16->32 bits, irrespective of sign
-def : Pat<(i32 (anyext R16C:$rSrc)),
- (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Truncates:
-// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
-// above are custom lowered.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(i8 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
-
-def : Pat<(i8 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
-
-def : Pat<(i8 (trunc R32C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i8 (trunc R16C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R16C:$src, VECREG),
- (COPY_TO_REGCLASS R16C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i16 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
-
-def : Pat<(i16 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
-
-def : Pat<(i16 (trunc R32C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
-
-def : Pat<(i32 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
-
-def : Pat<(i32 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Address generation: SPU, like PPC, has to split addresses into high and
-// low parts in order to load them into a register.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
-def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
-def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
- (SPUlo tglobaladdr:$in, 0)),
- (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
- (SPUlo texternalsym:$in, 0)),
- (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
- (SPUlo tjumptable:$in, 0)),
- (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
- (SPUlo tconstpool:$in, 0)),
- (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
- (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
- (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
- (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
- (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-// Intrinsics:
-include "CellSDKIntrinsics.td"
-// Various math operator instruction sequences
-include "SPUMathInstr.td"
-// 64-bit "instructions"/support
-include "SPU64InstrInfo.td"
-// 128-bit "instructions"/support
-include "SPU128InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
deleted file mode 100644
index 399684bb0887..000000000000
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_MACHINE_FUNCTION_INFO_H
-#define SPU_MACHINE_FUNCTION_INFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// SPUFunctionInfo - Cell SPU target-specific information for each
-/// MachineFunction
-class SPUFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
- /// UsesLR - Indicates whether LR is used in the current function.
- ///
- bool UsesLR;
-
- // VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
-
-public:
- SPUFunctionInfo(MachineFunction& MF)
- : UsesLR(false),
- VarArgsFrameIndex(0)
- {}
-
- void setUsesLR(bool U) { UsesLR = U; }
- bool usesLR() { return UsesLR; }
-
- int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
- void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
-};
-
-} // end of namespace llvm
-
-
-#endif
-
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
deleted file mode 100644
index 9a5c3976afbe..000000000000
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
-//
-// Cell SPU math operations
-//
-// This target description file contains instruction sequences for various
-// math operations, such as vector multiplies, i32 multiply, etc., for the
-// SPU's i32, i16 i8 and corresponding vector types.
-//
-// Any resemblance to libsimdmath or the Cell SDK simdmath library is
-// purely and completely coincidental.
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v16i8 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
- (ORv4i32
- (ANDv4i32
- (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
- (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
- (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
- (FSMBIv8i16 0x2222)),
- (ILAv4i32 0x0000ffff)),
- (SHLIv4i32
- (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
- (ROTMAIv4i32_i32 VECREG:$rB, 16)),
- (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
- (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
- (FSMBIv8i16 0x2222)), 16))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v8i16 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
- (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
- (FSMBIv8i16 0xcccc))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def MPYv4i32:
- Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (Av4i32
- (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
- (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
- (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
-
-def MPYi32:
- Pat<(mul R32C:$rA, R32C:$rB),
- (Ar32
- (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
- (MPYHr32 R32C:$rB, R32C:$rA)),
- (MPYUr32 R32C:$rA, R32C:$rB))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f32, v4f32 divide instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Reciprocal estimate and interpolation
-def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
-// Division estimate
-def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
- Interpf32.Fragment,
- DivEstf32.Fragment)>;
-// Epsilon addition
-def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
-
-def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
- (SELBf32_cond NRaphf32.Fragment,
- Epsilonf32.Fragment,
- (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
-
-// Reciprocal estimate and interpolation
-def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
-// Division estimate
-def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rA)),
- Interpv4f32.Fragment,
- DivEstv4f32.Fragment)>;
-// Epsilon addition
-def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
-
-def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (SELBv4f32_cond NRaphv4f32.Fragment,
- Epsilonv4f32.Fragment,
- (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
- Epsilonv4f32.Fragment,
- (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
deleted file mode 100644
index a47e9ef0167c..000000000000
--- a/lib/Target/CellSPU/SPUNodes.td
+++ /dev/null
@@ -1,159 +0,0 @@
-//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Type profiles and SelectionDAG nodes used by CellSPU
-//
-//===----------------------------------------------------------------------===//
-
-// Type profile for a call sequence
-def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
-
-// SPU_GenControl: Type profile for generating control words for insertions
-def SPU_GenControl : SDTypeProfile<1, 1, []>;
-def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
-//===----------------------------------------------------------------------===//
-// Operand constraints:
-//===----------------------------------------------------------------------===//
-
-def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-// Operand type constraints for vector shuffle/permute operations
-def SDT_SPUshuffle : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Vector binary operator type constraints (needs a further constraint to
-// ensure that operand 0 is a vector...):
-
-def SPUVecBinop: SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Trinary operators, e.g., addx, carry generate
-def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
-]>;
-
-// SELECT_MASK type constraints: There are several variations for the various
-// vector types (this avoids having to bit_convert all over the place.)
-def SPUselmask_type: SDTypeProfile<1, 1, [
- SDTCisInt<1>
-]>;
-
-// SELB type constraints:
-def SPUselb_type: SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
-
-// SPU Vector shift pseudo-instruction type constraints
-def SPUvecshift_type: SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
-
-// "marker" type for i64 operators that need a shuffle mask
-// (i.e., uses cg or bg or another instruction that needs to
-// use shufb to get things in the right place.)
-// Op0: The result
-// Op1, 2: LHS, RHS
-// Op3: Carry-generate shuffle mask
-
-def SPUmarker_type : SDTypeProfile<1, 3, [
- SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
-
-//===----------------------------------------------------------------------===//
-// Synthetic/pseudo-instructions
-//===----------------------------------------------------------------------===//
-
-// SPU CNTB:
-def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
-
-// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
-// SPUISelLowering.h):
-def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-
-// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
-def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
-def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
-def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
-
-def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
-def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
-
-// Vector rotate left, bits shifted out of the left are rotated in on the right
-def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
- SPUvecshift_type, []>;
-
-// Vector rotate left by bytes, but the count is given in bits and the SPU
-// internally converts it to bytes (saves an instruction to mask off lower
-// three bits)
-def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
- SPUvecshift_type>;
-
-// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
-// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
-def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
-
-// SPU form select mask for bytes, immediate
-def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
-
-// SPU select bits instruction
-def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
-
-def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
-def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
-
-def SPU_vec_demote : SDTypeProfile<1, 1, []>;
-def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
-
-// Address high and low components, used for [r+r] type addressing
-def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
-def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
-
-// PC-relative address
-def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
-
-// A-Form local store addresses
-def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
-
-// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
-def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
-
-// i64 markers: supplies extra operands used to generate the i64 operator
-// instruction sequences
-def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
-def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
-def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
-
-//===----------------------------------------------------------------------===//
-// Constraints: (taken from PPCInstrInfo.td)
-//===----------------------------------------------------------------------===//
-
-class RegConstraint<string C> {
- string Constraints = C;
-}
-
-class NoEncode<string E> {
- string DisableEncoding = E;
-}
-
-//===----------------------------------------------------------------------===//
-// Return (flag isn't quite what it means: the operations are flagged so that
-// instruction scheduling doesn't disassociate them.)
-//===----------------------------------------------------------------------===//
-
-def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
deleted file mode 100644
index 7c58041e3b84..000000000000
--- a/lib/Target/CellSPU/SPUNopFiller.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The final pass just before assembly printing. This pass is the last
-// checkpoint where nops and lnops are added to the instruction stream to
-// satisfy the dual issue requirements. The actual dual issue scheduling is
-// done (TODO: nowhere, currently)
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- struct SPUNopFiller : public MachineFunctionPass {
-
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- const InstrItineraryData *IID;
- bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
-
- static char ID;
- SPUNopFiller(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
- IID(tm.getInstrItineraryData())
- {
- DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
- }
-
- virtual const char *getPassName() const {
- return "SPU nop/lnop Filler";
- }
-
- void runOnMachineBasicBlock(MachineBasicBlock &MBB);
-
- bool runOnMachineFunction(MachineFunction &F) {
- isEvenPlace = true; //all functions get an .align 3 directive at start
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- runOnMachineBasicBlock(*FI);
- return true; //never-ever do any more modifications, just print it!
- }
-
- typedef enum { none = 0, // no more instructions in this function / BB
- pseudo = 1, // this does not get executed
- even = 2,
- odd = 3 } SPUOpPlace;
- SPUOpPlace getOpPlacement( MachineInstr &instr );
-
- };
- char SPUNopFiller::ID = 0;
-
-}
-
-// Fill a BasicBlock to alignment.
-// In the assebly we align the functions to 'even' adresses, but
-// basic blocks have an implicit alignmnet. We hereby define
-// basic blocks to have the same, even, alignment.
-void SPUNopFiller::
-runOnMachineBasicBlock(MachineBasicBlock &MBB)
-{
- assert( isEvenPlace && "basic block start from odd address");
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- {
- SPUOpPlace this_optype, next_optype;
- MachineBasicBlock::iterator J = I;
- J++;
-
- this_optype = getOpPlacement( *I );
- next_optype = none;
- while (J!=MBB.end()){
- next_optype = getOpPlacement( *J );
- ++J;
- if (next_optype != pseudo )
- break;
- }
-
- // padd: odd(wrong), even(wrong), ...
- // to: nop(corr), odd(corr), even(corr)...
- if( isEvenPlace && this_optype == odd && next_optype == even ) {
- DEBUG( dbgs() <<"Adding NOP before: "; );
- DEBUG( I->dump(); );
- BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
- isEvenPlace=false;
- }
-
- // padd: even(wrong), odd(wrong), ...
- // to: lnop(corr), even(corr), odd(corr)...
- else if ( !isEvenPlace && this_optype == even && next_optype == odd){
- DEBUG( dbgs() <<"Adding LNOP before: "; );
- DEBUG( I->dump(); );
- BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
- isEvenPlace=true;
- }
-
- // now go to next mem slot
- if( this_optype != pseudo )
- isEvenPlace = !isEvenPlace;
-
- }
-
- // padd basicblock end
- if( !isEvenPlace ){
- MachineBasicBlock::iterator J = MBB.end();
- J--;
- if (getOpPlacement( *J ) == odd) {
- DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
- BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
- }
- else {
- J++;
- DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
- BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
- }
- isEvenPlace=true;
- }
-}
-
-FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
- return new SPUNopFiller(tm);
-}
-
-// Figure out if 'instr' is executed in the even or odd pipeline
-SPUNopFiller::SPUOpPlace
-SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
- int sc = instr.getDesc().getSchedClass();
- const InstrStage *stage = IID->beginStage(sc);
- unsigned FUs = stage->getUnits();
- SPUOpPlace retval;
-
- switch( FUs ) {
- case 0: retval = pseudo; break;
- case 1: retval = odd; break;
- case 2: retval = even; break;
- default: retval= pseudo;
- assert( false && "got unknown FuncUnit\n");
- break;
- };
- return retval;
-}
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
deleted file mode 100644
index 6f8deef5530f..000000000000
--- a/lib/Target/CellSPU/SPUOperands.td
+++ /dev/null
@@ -1,664 +0,0 @@
-//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instruction Operands:
-//===----------------------------------------------------------------------===//
-
-// TO_IMM32 - Convert an i8/i16 to i32.
-def TO_IMM32 : SDNodeXForm<imm, [{
- return getI32Imm(N->getZExtValue());
-}]>;
-
-// TO_IMM16 - Convert an i8/i32 to i16.
-def TO_IMM16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
-}]>;
-
-
-def LO16 : SDNodeXForm<imm, [{
- unsigned val = N->getZExtValue();
- // Transformation function: get the low 16 bits.
- return getI32Imm(val & 0xffff);
-}]>;
-
-def LO16_vec : SDNodeXForm<scalar_to_vector, [{
- SDValue OpVal(0, 0);
-
- // Transformation function: get the low 16 bit immediate from a build_vector
- // node.
- assert(N->getOpcode() == ISD::BUILD_VECTOR
- && "LO16_vec got something other than a BUILD_VECTOR");
-
- // Get first constant operand...
- for (unsigned i = 0, e = N->getNumOperands();
- OpVal.getNode() == 0 && i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- }
-
- assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
- ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
- return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
-}]>;
-
-// Transform an immediate, returning the high 16 bits shifted down:
-def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
-// Transformation function: shift the high 16 bit immediate from a build_vector
-// node into the low 16 bits, and return a 16-bit constant.
-def HI16_vec : SDNodeXForm<scalar_to_vector, [{
- SDValue OpVal(0, 0);
-
- assert(N->getOpcode() == ISD::BUILD_VECTOR
- && "HI16_vec got something other than a BUILD_VECTOR");
-
- // Get first constant operand...
- for (unsigned i = 0, e = N->getNumOperands();
- OpVal.getNode() == 0 && i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- }
-
- assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
- ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
- return getI32Imm((unsigned)CN->getZExtValue() >> 16);
-}]>;
-
-// simm7 predicate - True if the immediate fits in an 7-bit signed
-// field.
-def simm7: PatLeaf<(imm), [{
- int sextVal = int(N->getSExtValue());
- return (sextVal >= -64 && sextVal <= 63);
-}]>;
-
-// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
-// field.
-def uimm7: PatLeaf<(imm), [{
- return (N->getZExtValue() <= 0x7f);
-}]>;
-
-// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
-// field.
-def immSExt8 : PatLeaf<(imm), [{
- int Value = int(N->getSExtValue());
- return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
-}]>;
-
-// immU8: immediate, unsigned 8-bit quantity
-def immU8 : PatLeaf<(imm), [{
- return (N->getZExtValue() <= 0xff);
-}]>;
-
-// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i32ImmSExt10 : PatLeaf<(imm), [{
- return isI32IntS10Immediate(N);
-}]>;
-
-// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
-// field. Used by RI10Form instructions like 'ldq'.
-def i32ImmUns10 : PatLeaf<(imm), [{
- return isI32IntU10Immediate(N);
-}]>;
-
-// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i16ImmSExt10 : PatLeaf<(imm), [{
- return isI16IntS10Immediate(N);
-}]>;
-
-// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
-// value. Used by RI10Form instructions.
-def i16ImmUns10 : PatLeaf<(imm), [{
- return isI16IntU10Immediate(N);
-}]>;
-
-def immSExt16 : PatLeaf<(imm), [{
- // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- short Ignored;
- return isIntS16Immediate(N, Ignored);
-}]>;
-
-def immZExt16 : PatLeaf<(imm), [{
- // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
- // field.
- return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
-}], LO16>;
-
-def immU16 : PatLeaf<(imm), [{
- // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
- return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
-}]>;
-
-def imm18 : PatLeaf<(imm), [{
- // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
- int Value = (int) N->getZExtValue();
- return isUInt<18>(Value);
-}]>;
-
-def lo16 : PatLeaf<(imm), [{
- // lo16 predicate - returns true if the immediate has all zeros in the
- // low order bits and is a 32-bit constant:
- if (N->getValueType(0) == MVT::i32) {
- uint32_t val = N->getZExtValue();
- return ((val & 0x0000ffff) == val);
- }
-
- return false;
-}], LO16>;
-
-def hi16 : PatLeaf<(imm), [{
- // hi16 predicate - returns true if the immediate has all zeros in the
- // low order bits and is a 32-bit constant:
- if (N->getValueType(0) == MVT::i32) {
- uint32_t val = uint32_t(N->getZExtValue());
- return ((val & 0xffff0000) == val);
- } else if (N->getValueType(0) == MVT::i64) {
- uint64_t val = N->getZExtValue();
- return ((val & 0xffff0000ULL) == val);
- }
-
- return false;
-}], HI16>;
-
-def bitshift : PatLeaf<(imm), [{
- // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
- // (shift left quadword by bits immediate)
- int64_t Val = N->getZExtValue();
- return (Val > 0 && Val <= 7);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Floating point operands:
-//===----------------------------------------------------------------------===//
-
-// Transform a float, returning the high 16 bits shifted down, as if
-// the float was really an unsigned integer:
-def HI16_f32 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) >> 16);
-}]>;
-
-// Transformation function on floats: get the low 16 bits as if the float was
-// an unsigned integer.
-def LO16_f32 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & 0xffff);
-}]>;
-
-def FPimm_sext16 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
-}]>;
-
-def FPimm_u18 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
-}]>;
-
-def fpimmSExt16 : PatLeaf<(fpimm), [{
- short Ignored;
- return isFPS16Immediate(N, Ignored);
-}], FPimm_sext16>;
-
-// Does the SFP constant only have upp 16 bits set?
-def hi16_f32 : PatLeaf<(fpimm), [{
- if (N->getValueType(0) == MVT::f32) {
- uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
- return ((val & 0xffff0000) == val);
- }
-
- return false;
-}], HI16_f32>;
-
-// Does the SFP constant fit into 18 bits?
-def fpimm18 : PatLeaf<(fpimm), [{
- if (N->getValueType(0) == MVT::f32) {
- uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
- return isUInt<18>(Value);
- }
-
- return false;
-}], FPimm_u18>;
-
-//===----------------------------------------------------------------------===//
-// 64-bit operands (TODO):
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// build_vector operands:
-//===----------------------------------------------------------------------===//
-
-// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8SExt8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8SExt8Imm_xform>;
-
-// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8U8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8U8Imm_xform>;
-
-// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt8Imm_xform>;
-
-// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt10Imm_xform>;
-
-// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
-// immediate constant load for v8i16 vectors.
-def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v8i16Uns10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns10Imm_xform>;
-
-// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns16Imm_xform>;
-
-// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt10Imm_xform>;
-
-// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v4i32Uns10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns10Imm_xform>;
-
-// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt16Imm_xform>;
-
-// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v4i32Uns18Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns18Imm_xform>;
-
-// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
-// load.
-def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
-}]>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec: PatLeaf<(build_vector), [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v4i32_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_v4i32_imm(N, *CurDAG);
-}]>;
-
-def v4i32Imm: PatLeaf<(build_vector), [{
- return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
-}], v4i32_get_imm>;
-
-// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt10Imm_xform>;
-
-// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt16Imm_xform>;
-
-// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v2i64 vectors.
-def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v2i64Uns18Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64Uns18Imm_xform>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec_i64: PatLeaf<(build_vector), [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v2i64_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_v2i64_imm(N, *CurDAG);
-}]>;
-
-def v2i64Imm: PatLeaf<(build_vector), [{
- return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
-}], v2i64_get_imm>;
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-
-def s7imm: Operand<i8> {
- let PrintMethod = "printS7ImmOperand";
-}
-
-def s7imm_i8: Operand<i8> {
- let PrintMethod = "printS7ImmOperand";
-}
-
-def u7imm: Operand<i16> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i8: Operand<i8> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i32: Operand<i32> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-// Halfword, signed 10-bit constant
-def s10imm : Operand<i16> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i8: Operand<i8> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i32: Operand<i32> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i64: Operand<i64> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-// Unsigned 10-bit integers:
-def u10imm: Operand<i16> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i8: Operand<i8> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i32: Operand<i32> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def s16imm : Operand<i16> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i8: Operand<i8> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i32: Operand<i32> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i64: Operand<i64> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f32: Operand<f32> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f64: Operand<f64> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def u16imm_i64 : Operand<i64> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm_i32 : Operand<i32> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm : Operand<i16> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def f16imm : Operand<f32> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def s18imm : Operand<i32> {
- let PrintMethod = "printS18ImmOperand";
-}
-
-def u18imm : Operand<i32> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def u18imm_i64 : Operand<i64> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm : Operand<f32> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm_f64 : Operand<f64> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-// Negated 7-bit halfword rotate immediate operands
-def rothNeg7imm : Operand<i32> {
- let PrintMethod = "printROTHNeg7Imm";
-}
-
-def rothNeg7imm_i16 : Operand<i16> {
- let PrintMethod = "printROTHNeg7Imm";
-}
-
-// Negated 7-bit word rotate immediate operands
-def rotNeg7imm : Operand<i32> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i16 : Operand<i16> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i8 : Operand<i8> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def target : Operand<OtherVT> {
- let PrintMethod = "printBranchOperand";
-}
-
-// Absolute address call target
-def calltarget : Operand<iPTR> {
- let PrintMethod = "printCallOperand";
- let MIOperandInfo = (ops u18imm:$calldest);
-}
-
-// PC relative call target
-def relcalltarget : Operand<iPTR> {
- let PrintMethod = "printPCRelativeOperand";
- let MIOperandInfo = (ops s16imm:$calldest);
-}
-
-// Branch targets:
-def brtarget : Operand<OtherVT> {
- let PrintMethod = "printPCRelativeOperand";
-}
-
-// Hint for branch target
-def hbrtarget : Operand<OtherVT> {
- let PrintMethod = "printHBROperand";
-}
-
-// Indirect call target
-def indcalltarget : Operand<iPTR> {
- let PrintMethod = "printCallOperand";
- let MIOperandInfo = (ops ptr_rc:$calldest);
-}
-
-def symbolHi: Operand<i32> {
- let PrintMethod = "printSymbolHi";
-}
-
-def symbolLo: Operand<i32> {
- let PrintMethod = "printSymbolLo";
-}
-
-def symbolLSA: Operand<i32> {
- let PrintMethod = "printSymbolLSA";
-}
-
-// Shuffle address memory operaand [s7imm(reg) d-format]
-def shufaddr : Operand<iPTR> {
- let PrintMethod = "printShufAddr";
- let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
-}
-
-// memory s10imm(reg) operand
-def dformaddr : Operand<iPTR> {
- let PrintMethod = "printDFormAddr";
- let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
-}
-
-// 256K local store address
-// N.B.: The tblgen code generator expects to have two operands, an offset
-// and a pointer. Of these, only the immediate is actually used.
-def addr256k : Operand<iPTR> {
- let PrintMethod = "printAddr256K";
- let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
-}
-
-// memory s18imm(reg) operand
-def memri18 : Operand<iPTR> {
- let PrintMethod = "printMemRegImmS18";
- let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
-}
-
-// memory register + register operand
-def memrr : Operand<iPTR> {
- let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
-}
-
-// Define SPU-specific addressing modes: These come in three basic
-// flavors:
-//
-// D-form : [r+I10] (10-bit signed offset + reg)
-// X-form : [r+r] (reg+reg)
-// A-form : abs (256K LSA offset)
-// D-form(2): [r+I7] (7-bit signed offset + reg)
-
-def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
- [], [SDNPWantRoot]>;
-def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
- [], [SDNPWantRoot]>;
-def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
- [], [SDNPWantRoot]>;
-def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
- [], [SDNPWantRoot]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
deleted file mode 100644
index e6c872d0bbb7..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "reginfo"
-#include "SPURegisterInfo.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUSubtarget.h"
-#include "SPUMachineFunction.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <cstdlib>
-
-#define GET_REGINFO_TARGET_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- using namespace SPU;
- switch (RegEnum) {
- case SPU::R0: return 0;
- case SPU::R1: return 1;
- case SPU::R2: return 2;
- case SPU::R3: return 3;
- case SPU::R4: return 4;
- case SPU::R5: return 5;
- case SPU::R6: return 6;
- case SPU::R7: return 7;
- case SPU::R8: return 8;
- case SPU::R9: return 9;
- case SPU::R10: return 10;
- case SPU::R11: return 11;
- case SPU::R12: return 12;
- case SPU::R13: return 13;
- case SPU::R14: return 14;
- case SPU::R15: return 15;
- case SPU::R16: return 16;
- case SPU::R17: return 17;
- case SPU::R18: return 18;
- case SPU::R19: return 19;
- case SPU::R20: return 20;
- case SPU::R21: return 21;
- case SPU::R22: return 22;
- case SPU::R23: return 23;
- case SPU::R24: return 24;
- case SPU::R25: return 25;
- case SPU::R26: return 26;
- case SPU::R27: return 27;
- case SPU::R28: return 28;
- case SPU::R29: return 29;
- case SPU::R30: return 30;
- case SPU::R31: return 31;
- case SPU::R32: return 32;
- case SPU::R33: return 33;
- case SPU::R34: return 34;
- case SPU::R35: return 35;
- case SPU::R36: return 36;
- case SPU::R37: return 37;
- case SPU::R38: return 38;
- case SPU::R39: return 39;
- case SPU::R40: return 40;
- case SPU::R41: return 41;
- case SPU::R42: return 42;
- case SPU::R43: return 43;
- case SPU::R44: return 44;
- case SPU::R45: return 45;
- case SPU::R46: return 46;
- case SPU::R47: return 47;
- case SPU::R48: return 48;
- case SPU::R49: return 49;
- case SPU::R50: return 50;
- case SPU::R51: return 51;
- case SPU::R52: return 52;
- case SPU::R53: return 53;
- case SPU::R54: return 54;
- case SPU::R55: return 55;
- case SPU::R56: return 56;
- case SPU::R57: return 57;
- case SPU::R58: return 58;
- case SPU::R59: return 59;
- case SPU::R60: return 60;
- case SPU::R61: return 61;
- case SPU::R62: return 62;
- case SPU::R63: return 63;
- case SPU::R64: return 64;
- case SPU::R65: return 65;
- case SPU::R66: return 66;
- case SPU::R67: return 67;
- case SPU::R68: return 68;
- case SPU::R69: return 69;
- case SPU::R70: return 70;
- case SPU::R71: return 71;
- case SPU::R72: return 72;
- case SPU::R73: return 73;
- case SPU::R74: return 74;
- case SPU::R75: return 75;
- case SPU::R76: return 76;
- case SPU::R77: return 77;
- case SPU::R78: return 78;
- case SPU::R79: return 79;
- case SPU::R80: return 80;
- case SPU::R81: return 81;
- case SPU::R82: return 82;
- case SPU::R83: return 83;
- case SPU::R84: return 84;
- case SPU::R85: return 85;
- case SPU::R86: return 86;
- case SPU::R87: return 87;
- case SPU::R88: return 88;
- case SPU::R89: return 89;
- case SPU::R90: return 90;
- case SPU::R91: return 91;
- case SPU::R92: return 92;
- case SPU::R93: return 93;
- case SPU::R94: return 94;
- case SPU::R95: return 95;
- case SPU::R96: return 96;
- case SPU::R97: return 97;
- case SPU::R98: return 98;
- case SPU::R99: return 99;
- case SPU::R100: return 100;
- case SPU::R101: return 101;
- case SPU::R102: return 102;
- case SPU::R103: return 103;
- case SPU::R104: return 104;
- case SPU::R105: return 105;
- case SPU::R106: return 106;
- case SPU::R107: return 107;
- case SPU::R108: return 108;
- case SPU::R109: return 109;
- case SPU::R110: return 110;
- case SPU::R111: return 111;
- case SPU::R112: return 112;
- case SPU::R113: return 113;
- case SPU::R114: return 114;
- case SPU::R115: return 115;
- case SPU::R116: return 116;
- case SPU::R117: return 117;
- case SPU::R118: return 118;
- case SPU::R119: return 119;
- case SPU::R120: return 120;
- case SPU::R121: return 121;
- case SPU::R122: return 122;
- case SPU::R123: return 123;
- case SPU::R124: return 124;
- case SPU::R125: return 125;
- case SPU::R126: return 126;
- case SPU::R127: return 127;
- default:
- report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
- }
-}
-
-SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
- const TargetInstrInfo &tii) :
- SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
-{
-}
-
-/// getPointerRegClass - Return the register class to use to hold pointers.
-/// This is used for addressing modes.
-const TargetRegisterClass *
-SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
- return &SPU::R32CRegClass;
-}
-
-const uint16_t *
-SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
-{
- // Cell ABI calling convention
- static const uint16_t SPU_CalleeSaveRegs[] = {
- SPU::R80, SPU::R81, SPU::R82, SPU::R83,
- SPU::R84, SPU::R85, SPU::R86, SPU::R87,
- SPU::R88, SPU::R89, SPU::R90, SPU::R91,
- SPU::R92, SPU::R93, SPU::R94, SPU::R95,
- SPU::R96, SPU::R97, SPU::R98, SPU::R99,
- SPU::R100, SPU::R101, SPU::R102, SPU::R103,
- SPU::R104, SPU::R105, SPU::R106, SPU::R107,
- SPU::R108, SPU::R109, SPU::R110, SPU::R111,
- SPU::R112, SPU::R113, SPU::R114, SPU::R115,
- SPU::R116, SPU::R117, SPU::R118, SPU::R119,
- SPU::R120, SPU::R121, SPU::R122, SPU::R123,
- SPU::R124, SPU::R125, SPU::R126, SPU::R127,
- SPU::R2, /* environment pointer */
- SPU::R1, /* stack pointer */
- SPU::R0, /* link register */
- 0 /* end */
- };
-
- return SPU_CalleeSaveRegs;
-}
-
-/*!
- R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
- generally unused) are the Cell's reserved registers
- */
-BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(SPU::R0); // LR
- Reserved.set(SPU::R1); // SP
- Reserved.set(SPU::R2); // environment pointer
- return Reserved;
-}
-
-//===----------------------------------------------------------------------===//
-// Stack Frame Processing methods
-//===----------------------------------------------------------------------===//
-
-//--------------------------------------------------------------------------
-void
-SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I)
- const
-{
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
-void
-SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const
-{
- unsigned i = 0;
- MachineInstr &MI = *II;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- DebugLoc dl = II->getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- MachineOperand &SPOp = MI.getOperand(i);
- int FrameIndex = SPOp.getIndex();
-
- // Now add the frame object offset to the offset from r1.
- int Offset = MFI->getObjectOffset(FrameIndex);
-
- // Most instructions, except for generated FrameIndex additions using AIr32
- // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
- // immediate in operand 2.
- unsigned OpNo = 1;
- if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
- OpNo = 2;
-
- MachineOperand &MO = MI.getOperand(OpNo);
-
- // Offset is biased by $lr's slot at the bottom.
- Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
- assert((Offset & 0xf) == 0
- && "16-byte alignment violated in eliminateFrameIndex");
-
- // Replace the FrameIndex with base register with $sp (aka $r1)
- SPOp.ChangeToRegister(SPU::R1, false);
-
- // if 'Offset' doesn't fit to the D-form instruction's
- // immediate, convert the instruction to X-form
- // if the instruction is not an AI (which takes a s10 immediate), assume
- // it is a load/store that can take a s14 immediate
- if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
- || !isInt<14>(Offset)) {
- int newOpcode = convertDFormToXForm(MI.getOpcode());
- unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
- BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
- .addImm(Offset);
- BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
- .addReg(tmpReg, RegState::Kill)
- .addReg(SPU::R1);
- // remove the replaced D-form instruction
- MBB.erase(II);
- } else {
- MO.ChangeToImmediate(Offset);
- }
-}
-
-unsigned
-SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
-{
- return SPU::R1;
-}
-
-int
-SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
-{
- switch(dFormOpcode)
- {
- case SPU::AIr32: return SPU::Ar32;
- case SPU::LQDr32: return SPU::LQXr32;
- case SPU::LQDr128: return SPU::LQXr128;
- case SPU::LQDv16i8: return SPU::LQXv16i8;
- case SPU::LQDv4i32: return SPU::LQXv4i32;
- case SPU::LQDv4f32: return SPU::LQXv4f32;
- case SPU::STQDr32: return SPU::STQXr32;
- case SPU::STQDr128: return SPU::STQXr128;
- case SPU::STQDv16i8: return SPU::STQXv16i8;
- case SPU::STQDv4i32: return SPU::STQXv4i32;
- case SPU::STQDv4f32: return SPU::STQXv4f32;
-
- default: assert( false && "Unhandled D to X-form conversion");
- }
- // default will assert, but need to return something to keep the
- // compiler happy.
- return dFormOpcode;
-}
-
-// TODO this is already copied from PPC. Could this convenience function
-// be moved to the RegScavenger class?
-unsigned
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
- RegScavenger *RS,
- const TargetRegisterClass *RC,
- int SPAdj) const
-{
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- assert( Reg && "Register scavenger failed");
- return Reg;
-}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
deleted file mode 100644
index e9f9aba63a48..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTERINFO_H
-#define SPU_REGISTERINFO_H
-
-#include "SPU.h"
-
-#define GET_REGINFO_HEADER
-#include "SPUGenRegisterInfo.inc"
-
-namespace llvm {
- class SPUSubtarget;
- class TargetInstrInfo;
- class Type;
-
- class SPURegisterInfo : public SPUGenRegisterInfo {
- private:
- const SPUSubtarget &Subtarget;
- const TargetInstrInfo &TII;
-
- //! Predicate: Does the machine function use the link register?
- bool usesLR(MachineFunction &MF) const;
-
- public:
- SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
-
- //! Translate a register's enum value to a register number
- /*!
- This method translates a register's enum value to it's regiser number,
- e.g. SPU::R14 -> 14.
- */
- static unsigned getRegisterNumbering(unsigned RegEnum);
-
- /// getPointerRegClass - Return the register class to use to hold pointers.
- /// This is used for addressing modes.
- virtual const TargetRegisterClass *
- getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
-
- /// After allocating this many registers, the allocator should feel
- /// register pressure. The value is a somewhat random guess, based on the
- /// number of non callee saved registers in the C calling convention.
- virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
- MachineFunction &MF) const{
- return 50;
- }
-
- //! Return the array of callee-saved registers
- virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-
- //! Allow for scavenging, so we can get scratch registers when needed.
- virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
- { return true; }
-
- //! Enable tracking of liveness after register allocation, since register
- // scavenging is enabled.
- virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
- { return true; }
-
- //! Return the reserved registers
- BitVector getReservedRegs(const MachineFunction &MF) const;
-
- //! Eliminate the call frame setup pseudo-instructions
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- //! Convert frame indicies into machine operands
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS = NULL) const;
-
- //! Get the stack frame register (SP, aka R1)
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
- //------------------------------------------------------------------------
- // New methods added:
- //------------------------------------------------------------------------
-
- //! Convert D-form load/store to X-form load/store
- /*!
- Converts a regiser displacement load/store into a register-indexed
- load/store for large stack frames, when the stack frame exceeds the
- range of a s10 displacement.
- */
- int convertDFormToXForm(int dFormOpcode) const;
-
- //! Acquire an unused register in an emergency.
- unsigned findScratchRegister(MachineBasicBlock::iterator II,
- RegScavenger *RS,
- const TargetRegisterClass *RC,
- int SPAdj) const;
-
- };
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
deleted file mode 100644
index f27b042edd63..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ /dev/null
@@ -1,183 +0,0 @@
-//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-class SPUReg<string n> : Register<n> {
- let Namespace = "SPU";
-}
-
-// The SPU's register are all 128-bits wide, which makes specifying the
-// registers relatively easy, if relatively mundane:
-
-class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
- field bits<7> Num = num;
-}
-
-def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
-def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
-def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
-def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
-def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
-def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
-def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
-def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
-def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
-def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
-def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
-def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
-def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
-def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
-def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
-def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
-def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
-def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
-def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
-def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
-def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
-def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
-def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
-def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
-def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
-def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
-def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
-def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
-def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
-def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
-def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
-def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
-def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
-def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
-def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
-def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
-def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
-def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
-def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
-def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
-def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
-def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
-def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
-def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
-def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
-def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
-def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
-def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
-def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
-def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
-def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
-def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
-def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
-def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
-def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
-def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
-def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
-def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
-def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
-def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
-def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
-def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
-def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
-def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
-def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
-def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
-def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
-def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
-def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
-def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
-def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
-def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
-def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
-def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
-def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
-def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
-def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
-def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
-def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
-def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
-def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
-def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
-def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
-def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
-def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
-def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
-def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
-def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
-def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
-def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
-def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
-def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
-def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
-def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
-def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
-def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
-def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
-def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
-def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
-def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
-def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
-def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
-def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
-def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
-def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
-def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
-def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
-def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
-def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
-def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
-def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
-def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
-def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
-def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
-def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
-def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
-def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
-def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
-def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
-def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
-def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
-def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
-def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
-def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
-def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
-def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
-def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
-def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
-
-/* Need floating point status register here: */
-/* def FPCSR : ... */
-
-// The SPU's registers as 128-bit wide entities, and can function as general
-// purpose registers, where the operands are in the "preferred slot":
-// The non-volatile registers are allocated in reverse order, like PPC does it.
-def GPRC : RegisterClass<"SPU", [i128], 128,
- (add (sequence "R%u", 0, 79),
- (sequence "R%u", 127, 80))>;
-
-// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
-def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
-
-// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
-def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
-
-// The SPU's registers as 32-bit wide (word) "preferred slot":
-def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
-
-// The SPU's registers as single precision floating point "preferred slot":
-def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
-
-// The SPU's registers as 16-bit wide (halfword) "preferred slot":
-def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
-
-// The SPU's registers as 8-bit wide (byte) "preferred slot":
-def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
-
-// The SPU's registers as vector registers:
-def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
- (add GPRC)>;
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
deleted file mode 100644
index e557ed340a28..000000000000
--- a/lib/Target/CellSPU/SPURegisterNames.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTER_NAMES_H
-#define SPU_REGISTER_NAMES_H
-
-// Define symbolic names for Cell registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
deleted file mode 100644
index 9ccd0844e48e..000000000000
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Even pipeline:
-
-def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
-def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Cell SPU
-//===----------------------------------------------------------------------===//
-
-def LoadStore : InstrItinClass; // ODD_UNIT
-def BranchHints : InstrItinClass; // ODD_UNIT
-def BranchResolv : InstrItinClass; // ODD_UNIT
-def ChanOpSPR : InstrItinClass; // ODD_UNIT
-def ShuffleOp : InstrItinClass; // ODD_UNIT
-def SelectOp : InstrItinClass; // ODD_UNIT
-def GatherOp : InstrItinClass; // ODD_UNIT
-def LoadNOP : InstrItinClass; // ODD_UNIT
-def ExecNOP : InstrItinClass; // EVEN_UNIT
-def SPrecFP : InstrItinClass; // EVEN_UNIT
-def DPrecFP : InstrItinClass; // EVEN_UNIT
-def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
-def ByteOp : InstrItinClass; // EVEN_UNIT
-def IntegerOp : InstrItinClass; // EVEN_UNIT
-def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
-def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
-def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
-def ImmLoad : InstrItinClass; // EVEN_UNIT
-
-/* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
- InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
- InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
- InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
- InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
- InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
- InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
- InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
- InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
- ]>;
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
deleted file mode 100644
index eec2d250be7f..000000000000
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUSubtarget.h"
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "SPUGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS) :
- SPUGenSubtargetInfo(TT, CPU, FS),
- StackAlignment(16),
- ProcDirective(SPU::DEFAULT_PROC),
- UseLargeMem(false)
-{
- // Should be the target SPU processor type. For now, since there's only
- // one, simply default to the current "v0" default:
- std::string default_cpu("v0");
-
- // Parse features string.
- ParseSubtargetFeatures(default_cpu, FS);
-
- // Initialize scheduling itinerary for the specified CPU.
- InstrItins = getInstrItineraryForCPU(default_cpu);
-}
-
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void SPUSubtarget::SetJITMode() {
-}
-
-/// Enable PostRA scheduling for optimization levels -O2 and -O3.
-bool SPUSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- // CriticalPathsRCs seems to be the set of
- // RegisterClasses that antidep breakings are performed for.
- // Do it for all register classes
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&SPU::R8CRegClass);
- CriticalPathRCs.push_back(&SPU::R16CRegClass);
- CriticalPathRCs.push_back(&SPU::R32CRegClass);
- CriticalPathRCs.push_back(&SPU::R32FPRegClass);
- CriticalPathRCs.push_back(&SPU::R64CRegClass);
- CriticalPathRCs.push_back(&SPU::VECREGRegClass);
- return OptLevel >= CodeGenOpt::Default;
-}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
deleted file mode 100644
index 27d28b22dd04..000000000000
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CELLSUBTARGET_H
-#define CELLSUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "SPUGenSubtargetInfo.inc"
-
-namespace llvm {
- class GlobalValue;
- class StringRef;
-
- namespace SPU {
- enum {
- PROC_NONE,
- DEFAULT_PROC
- };
- }
-
- class SPUSubtarget : public SPUGenSubtargetInfo {
- protected:
- /// stackAlignment - The minimum alignment known to hold of the stack frame
- /// on entry to the function and which must be maintained by every function.
- unsigned StackAlignment;
-
- /// Selected instruction itineraries (one entry per itinerary class.)
- InstrItineraryData InstrItins;
-
- /// Which SPU processor (this isn't really used, but it's there to keep
- /// the C compiler happy)
- unsigned ProcDirective;
-
- /// Use (assume) large memory -- effectively disables the LQA/STQA
- /// instructions that assume 259K local store.
- bool UseLargeMem;
-
- public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ///
- SPUSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- /// SetJITMode - This is called to inform the subtarget info that we are
- /// producing code for the JIT.
- void SetJITMode();
-
- /// getStackAlignment - Returns the minimum alignment known to hold of the
- /// stack frame on entry to the function and which must be maintained by
- /// every function for this subtarget.
- unsigned getStackAlignment() const { return StackAlignment; }
-
- /// getInstrItins - Return the instruction itineraies based on subtarget
- /// selection.
- const InstrItineraryData &getInstrItineraryData() const {
- return InstrItins;
- }
-
- /// Use large memory addressing predicate
- bool usingLargeMem() const {
- return UseLargeMem;
- }
-
- /// getDataLayoutString - Return the pointer size and type alignment
- /// properties of this subtarget.
- const char *getDataLayoutString() const {
- return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
- "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
- "-s:128:128-n32:64";
- }
-
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const;
- };
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
deleted file mode 100644
index 918316572a2e..000000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUTargetMachine.h"
-#include "SPU.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeCellSPUTarget() {
- // Register the target.
- RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
-}
-
-const std::pair<unsigned, int> *
-SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
- NumEntries = 1;
- return &LR[0];
-}
-
-SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DL(Subtarget.getDataLayoutString()),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this),
- TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Pipeline Configuration
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// SPU Code Generator Pass Configuration Options.
-class SPUPassConfig : public TargetPassConfig {
-public:
- SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- SPUTargetMachine &getSPUTargetMachine() const {
- return getTM<SPUTargetMachine>();
- }
-
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
-};
-} // namespace
-
-TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new SPUPassConfig(this, PM);
-}
-
-bool SPUPassConfig::addInstSelector() {
- // Install an instruction selector.
- addPass(createSPUISelDag(getSPUTargetMachine()));
- return false;
-}
-
-// passes to run just before printing the assembly
-bool SPUPassConfig::addPreEmitPass() {
- // load the TCE instruction scheduler, if available via
- // loaded plugins
- typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
- BuilderFunc schedulerCreator =
- (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
- "createTCESchedulerPass");
- if (schedulerCreator != NULL)
- addPass(schedulerCreator("cellspu"));
-
- //align instructions with nops/lnops for dual issue
- addPass(createSPUNopFillerPass(getSPUTargetMachine()));
- return true;
-}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
deleted file mode 100644
index 7f53ea6fbeb2..000000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ /dev/null
@@ -1,96 +0,0 @@
-//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the CellSPU-specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_TARGETMACHINE_H
-#define SPU_TARGETMACHINE_H
-
-#include "SPUSubtarget.h"
-#include "SPUInstrInfo.h"
-#include "SPUISelLowering.h"
-#include "SPUSelectionDAGInfo.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
-
-namespace llvm {
-
-/// SPUTargetMachine
-///
-class SPUTargetMachine : public LLVMTargetMachine {
- SPUSubtarget Subtarget;
- const DataLayout DL;
- SPUInstrInfo InstrInfo;
- SPUFrameLowering FrameLowering;
- SPUTargetLowering TLInfo;
- SPUSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
-public:
- SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-
- /// Return the subtarget implementation object
- virtual const SPUSubtarget *getSubtargetImpl() const {
- return &Subtarget;
- }
- virtual const SPUInstrInfo *getInstrInfo() const {
- return &InstrInfo;
- }
- virtual const SPUFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
- /*!
- \note Cell SPU does not support JIT today. It could support JIT at some
- point.
- */
- virtual TargetJITInfo *getJITInfo() {
- return NULL;
- }
-
- virtual const SPUTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- virtual const SPURegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const DataLayout *getDataLayout() const {
- return &DL;
- }
-
- virtual const InstrItineraryData *getInstrItineraryData() const {
- return &InstrItins;
- }
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 6a98f95db664..000000000000
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMCellSPUInfo
- CellSPUTargetInfo.cpp
- )
-
-add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
deleted file mode 100644
index 84aadfad6f8d..000000000000
--- a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheCellSPUTarget;
-
-extern "C" void LLVMInitializeCellSPUTargetInfo() {
- RegisterTarget<Triple::cellspu>
- X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
-}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 5c909903f94b..3e69098edcc3 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -13,25 +13,25 @@
//===----------------------------------------------------------------------===//
#include "CPPTargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
#include <algorithm>
#include <cstdio>
#include <map>
@@ -141,7 +141,7 @@ namespace {
std::string getCppName(const Value* val);
inline void printCppName(const Value* val);
- void printAttributes(const AttrListPtr &PAL, const std::string &name);
+ void printAttributes(const AttributeSet &PAL, const std::string &name);
void printType(Type* Ty);
void printTypes(const Module* M);
@@ -464,24 +464,25 @@ void CppWriter::printCppName(const Value* val) {
printEscapedString(getCppName(val));
}
-void CppWriter::printAttributes(const AttrListPtr &PAL,
+void CppWriter::printAttributes(const AttributeSet &PAL,
const std::string &name) {
- Out << "AttrListPtr " << name << "_PAL;";
+ Out << "AttributeSet " << name << "_PAL;";
nl(Out);
if (!PAL.isEmpty()) {
Out << '{'; in(); nl(Out);
- Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
- Out << "AttributeWithIndex PAWI;"; nl(Out);
+ Out << "SmallVector<AttributeSet, 4> Attrs;"; nl(Out);
+ Out << "AttributeSet PAS;"; in(); nl(Out);
for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
- unsigned index = PAL.getSlot(i).Index;
- AttrBuilder attrs(PAL.getSlot(i).Attrs);
- Out << "PAWI.Index = " << index << "U;\n";
- Out << " {\n AttrBuilder B;\n";
-
-#define HANDLE_ATTR(X) \
- if (attrs.hasAttribute(Attributes::X)) \
- Out << " B.addAttribute(Attributes::" #X ");\n"; \
- attrs.removeAttribute(Attributes::X);
+ unsigned index = PAL.getSlotIndex(i);
+ AttrBuilder attrs(PAL.getSlotAttributes(i), index);
+ Out << "{"; in(); nl(Out);
+ Out << "AttrBuilder B;"; nl(Out);
+
+#define HANDLE_ATTR(X) \
+ if (attrs.contains(Attribute::X)) { \
+ Out << "B.addAttribute(Attribute::" #X ");"; nl(Out); \
+ attrs.removeAttribute(Attribute::X); \
+ }
HANDLE_ATTR(SExt);
HANDLE_ATTR(ZExt);
@@ -499,6 +500,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(StackProtectStrong);
HANDLE_ATTR(NoCapture);
HANDLE_ATTR(NoRedZone);
HANDLE_ATTR(NoImplicitFloat);
@@ -509,16 +511,24 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(NonLazyBind);
HANDLE_ATTR(MinSize);
#undef HANDLE_ATTR
- if (attrs.hasAttribute(Attributes::StackAlignment))
- Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n";
- attrs.removeAttribute(Attributes::StackAlignment);
- assert(!attrs.hasAttributes() && "Unhandled attribute!");
- Out << " PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }";
- nl(Out);
- Out << "Attrs.push_back(PAWI);";
+
+ if (attrs.contains(Attribute::StackAlignment)) {
+ Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')';
+ nl(Out);
+ attrs.removeAttribute(Attribute::StackAlignment);
+ }
+
+ Out << "PAS = AttributeSet::get(mod->getContext(), ";
+ if (index == ~0U)
+ Out << "~0U,";
+ else
+ Out << index << "U,";
+ Out << " B);"; out(); nl(Out);
+ Out << "}"; out(); nl(Out);
nl(Out);
+ Out << "Attrs.push_back(PAS);"; nl(Out);
}
- Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);";
+ Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);";
nl(Out);
out(); nl(Out);
Out << '}'; nl(Out);
@@ -1888,23 +1898,24 @@ void CppWriter::printModuleBody() {
void CppWriter::printProgram(const std::string& fname,
const std::string& mName) {
- Out << "#include <llvm/LLVMContext.h>\n";
- Out << "#include <llvm/Module.h>\n";
- Out << "#include <llvm/DerivedTypes.h>\n";
- Out << "#include <llvm/Constants.h>\n";
- Out << "#include <llvm/GlobalVariable.h>\n";
- Out << "#include <llvm/Function.h>\n";
- Out << "#include <llvm/CallingConv.h>\n";
- Out << "#include <llvm/BasicBlock.h>\n";
- Out << "#include <llvm/Instructions.h>\n";
- Out << "#include <llvm/InlineAsm.h>\n";
- Out << "#include <llvm/Support/FormattedStream.h>\n";
- Out << "#include <llvm/Support/MathExtras.h>\n";
Out << "#include <llvm/Pass.h>\n";
Out << "#include <llvm/PassManager.h>\n";
+
Out << "#include <llvm/ADT/SmallVector.h>\n";
Out << "#include <llvm/Analysis/Verifier.h>\n";
Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <llvm/IR/BasicBlock.h>\n";
+ Out << "#include <llvm/IR/CallingConv.h>\n";
+ Out << "#include <llvm/IR/Constants.h>\n";
+ Out << "#include <llvm/IR/DerivedTypes.h>\n";
+ Out << "#include <llvm/IR/Function.h>\n";
+ Out << "#include <llvm/IR/GlobalVariable.h>\n";
+ Out << "#include <llvm/IR/InlineAsm.h>\n";
+ Out << "#include <llvm/IR/Instructions.h>\n";
+ Out << "#include <llvm/IR/LLVMContext.h>\n";
+ Out << "#include <llvm/IR/Module.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
Out << "#include <algorithm>\n";
Out << "using namespace llvm;\n\n";
Out << "Module* " << fname << "();\n\n";
@@ -1941,14 +1952,6 @@ void CppWriter::printModule(const std::string& fname,
}
nl(Out);
- // Loop over the dependent libraries and emit them.
- Module::lib_iterator LI = TheModule->lib_begin();
- Module::lib_iterator LE = TheModule->lib_end();
- while (LI != LE) {
- Out << "mod->addLibrary(\"" << *LI << "\");";
- nl(Out);
- ++LI;
- }
printModuleBody();
nl(Out) << "return mod;";
nl(Out,-1) << "}";
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 30d765d6c9ce..477e788ee2fd 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -14,8 +14,8 @@
#ifndef CPPTARGETMACHINE_H
#define CPPTARGETMACHINE_H
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
namespace llvm {
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index a8ac0a282cd1..1ca74a4895c4 100644
--- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "CPPTargetMachine.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 306084bb8c52..b5b887e7c7c8 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -9,6 +9,8 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
add_public_tablegen_target(HexagonCommonTableGen)
+set(LLVM_COMMON_DEPENDS intrinsics_gen)
+
add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonCallingConvLower.cpp
@@ -16,6 +18,7 @@ add_llvm_target(HexagonCodeGen
HexagonExpandPredSpillCode.cpp
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
+ HexagonFixupHwLoops.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
@@ -33,8 +36,6 @@ add_llvm_target(HexagonCodeGen
HexagonNewValueJump.cpp
)
-add_dependencies(LLVMHexagonCodeGen intrinsics_gen)
-
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 45f857bab8c6..dfbefc864283 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -21,14 +21,16 @@
namespace llvm {
class FunctionPass;
+ class ModulePass;
class TargetMachine;
class MachineInstr;
- class MCInst;
+ class HexagonMCInst;
class HexagonAsmPrinter;
class HexagonTargetMachine;
class raw_ostream;
- FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
@@ -53,7 +55,7 @@ namespace llvm {
TargetAsmBackend *createHexagonAsmBackend(const Target &,
const std::string &);
*/
- void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+ void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
HexagonAsmPrinter &AP);
} // end namespace llvm;
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 451e56206e60..8a5ee40590bb 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -32,6 +32,107 @@ def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
"Hexagon v5">;
//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget.hasV5TOps()">;
+def NoV5T : Predicate<"!Subtarget.hasV5TOps()">;
+def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["PredSense"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = [""];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+ let FilterClass = "PredNewRel";
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let ColFields = ["PNewValue"];
+ let KeyCol = [""];
+ let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+ let FilterClass = "NewValueRel";
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+ let ColFields = ["isNVStore"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+def getBasedWithImmOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+ "isMEMri", "isFloat"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["Absolute"];
+ let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["BaseImmOffset"];
+ let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+ let FilterClass = "ImmRegRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
include "HexagonSchedule.td"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index c15bce608f5e..88cd3fbacea0 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -17,20 +17,24 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
-#include "HexagonMCInst.h"
#include "HexagonTargetMachine.h"
#include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "InstPrinter/HexagonInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -38,22 +42,18 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -220,8 +220,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
for (unsigned Index = 0; Index < Size; Index++) {
HexagonMCInst MCI;
- MCI.setStartPacket(Index == 0);
- MCI.setEndPacket(Index == (Size-1));
+ MCI.setPacketStart(Index == 0);
+ MCI.setPacketEnd(Index == (Size-1));
HexagonLowerToMC(BundleMIs[Index], MCI, *this);
OutStreamer.EmitInstruction(MCI);
@@ -230,8 +230,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else {
HexagonMCInst MCI;
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- MCI.setStartPacket(true);
- MCI.setEndPacket(true);
+ MCI.setPacketStart(true);
+ MCI.setPacketEnd(true);
}
HexagonLowerToMC(MI, MCI, *this);
OutStreamer.EmitInstruction(MCI);
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 9bca9e070709..d4078ad28b60 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -7,21 +7,22 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon_cfg"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 73f9d9acab26..2c93d04f98e6 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -15,12 +15,12 @@
#include "HexagonCallingConvLower.h"
#include "Hexagon.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
index 1f601e87ad68..489b3a3e5985 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -17,9 +17,9 @@
#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
//
// Need to handle varargs.
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index ae2ca378881d..08144217fd30 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -17,9 +17,10 @@
//
//===----------------------------------------------------------------------===//
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -30,12 +31,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 000000000000..240cc9566648
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,183 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the loop needs to be set up manually, i.e. via
+// direct transfers to SAn and LCn.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+ initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// \brief Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// \brief Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// \brief Add the instruction to set the LC and SA registers explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+ "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ unsigned Dist = Sub > 0 ? Sub : -Sub;
+ if (Dist > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+}
+
+
+/// \brief convert a loop instruction to a sequence of instructions that
+/// set the LC0 and SA0 register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0)
+ .addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index cd682df7a574..d6a9329cd407 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -11,28 +11,28 @@
#include "HexagonFrameLowering.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -166,7 +166,8 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
unsigned RetOpcode = MBBI->getOpcode();
- return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+ return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+}
void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
@@ -326,6 +327,21 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
return true;
}
+void HexagonFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ llvm_unreachable("Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
return MF.getFrameInfo()->getObjectOffset(FI);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index ad87f11e2457..a62c76aaf676 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -35,6 +35,11 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
virtual bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index d756aec9bef9..178662447a7f 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,89 +27,202 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hwloops"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
#include <algorithm>
+#include <vector>
using namespace llvm;
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+#endif
+
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+namespace llvm {
+ void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct HexagonHardwareLoops : public MachineFunctionPass {
- MachineLoopInfo *MLI;
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ const HexagonTargetMachine *TM;
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+#ifndef NDEBUG
+ static int Counter;
+#endif
public:
- static char ID; // Pass identification, replacement for typeid
+ static char ID;
- HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {
+ initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "Hexagon Hardware Loops"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
- /// getCanonicalInductionVariable - Check to see if the loop has a canonical
- /// induction variable.
- /// Should be defined in MachineLoop. Based upon version in class Loop.
- const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
-
- /// getTripCount - Return a loop-invariant LLVM register indicating the
- /// number of times the loop will be executed. If the trip-count cannot
- /// be determined, this return null.
- CountValue *getTripCount(MachineLoop *L) const;
-
- /// isInductionOperation - Return true if the instruction matches the
- /// pattern for an opertion that defines an induction variable.
- bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+ /// Kinds of comparisons in the compare instructions.
+ struct Comparison {
+ enum Kind {
+ EQ = 0x01,
+ NE = 0x02,
+ L = 0x04, // Less-than property.
+ G = 0x08, // Greater-than property.
+ U = 0x40, // Unsigned property.
+ LTs = L,
+ LEs = L | EQ,
+ GTs = G,
+ GEs = G | EQ,
+ LTu = L | U,
+ LEu = L | EQ | U,
+ GTu = G | U,
+ GEu = G | EQ | U
+ };
+
+ static Kind getSwappedComparison(Kind Cmp) {
+ assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)(Cmp ^ (L|G));
+ return Cmp;
+ }
+ };
- /// isInvalidOperation - Return true if the instruction is not valid within
- /// a hardware loop.
+ /// \brief Find the register that contains the loop controlling
+ /// induction variable.
+ /// If successful, it will return true and set the \p Reg, \p IVBump
+ /// and \p IVOp arguments. Otherwise it will return false.
+ /// The returned induction register is the register R that follows the
+ /// following induction pattern:
+ /// loop:
+ /// R = phi ..., [ R.next, LatchBlock ]
+ /// R.next = R + #bump
+ /// if (R.next < #N) goto loop
+ /// IVBump is the immediate value added to R, and IVOp is the instruction
+ /// "R.next = R + #bump".
+ bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+ int64_t &IVBump, MachineInstr *&IVOp) const;
+
+ /// \brief Analyze the statements in a loop to determine if the loop
+ /// has a computable trip count and, if so, return a value that represents
+ /// the trip count expression.
+ CountValue *getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts);
+
+ /// \brief Return the expression that represents the number of times
+ /// a loop iterates. The function takes the operands that represent the
+ /// loop start value, loop end value, and induction value. Based upon
+ /// these operands, the function attempts to compute the trip count.
+ /// If the trip count is not directly available (as an immediate value,
+ /// or a register), the function will attempt to insert computation of it
+ /// to the loop's preheader.
+ CountValue *computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const;
+
+ /// \brief Return true if the instruction is not valid within a hardware
+ /// loop.
bool isInvalidLoopOperation(const MachineInstr *MI) const;
- /// containsInavlidInstruction - Return true if the loop contains an
- /// instruction that inhibits using the hardware loop.
+ /// \brief Return true if the loop contains an instruction that inhibits
+ /// using the hardware loop.
bool containsInvalidInstruction(MachineLoop *L) const;
- /// converToHardwareLoop - Given a loop, check if we can convert it to a
- /// hardware loop. If so, then perform the conversion and return true.
+ /// \brief Given a loop, check if we can convert it to a hardware loop.
+ /// If so, then perform the conversion and return true.
bool convertToHardwareLoop(MachineLoop *L);
+ /// \brief Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const;
+
+ /// \brief Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+
+ /// \brief Make sure that the "bump" instruction executes before the
+ /// compare. We need that for the IV fixup, so that the compare
+ /// instruction would not use a bumped value that has not yet been
+ /// defined. If the instructions are out of order, try to reorder them.
+ bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+ /// \brief Get the instruction that loads an immediate value into \p R,
+ /// or 0 if such an instruction does not exist.
+ MachineInstr *defWithImmediate(unsigned R);
+
+ /// \brief Get the immediate value referenced to by \p MO, either for
+ /// immediate operands, or for register operands, where the register
+ /// was defined with an immediate value.
+ int64_t getImmediate(MachineOperand &MO);
+
+ /// \brief Reset the given machine operand to now refer to a new immediate
+ /// value. Assumes that the operand was already referencing an immediate
+ /// value, either directly, or via a register.
+ void setImmediate(MachineOperand &MO, int64_t Val);
+
+ /// \brief Fix the data flow of the induction varible.
+ /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+ /// |
+ /// +-> back to phi
+ /// where "bump" is the increment of the induction variable:
+ /// iv = iv + #const.
+ /// Due to some prior code transformations, the actual flow may look
+ /// like this:
+ /// phi -+-> bump ---> back to phi
+ /// |
+ /// +-> comparison-in-latch (against upper_bound-bump),
+ /// i.e. the comparison that controls the loop execution may be using
+ /// the value of the induction variable from before the increment.
+ ///
+ /// Return true if the loop's flow is the desired one (i.e. it's
+ /// either been fixed, or no fixing was necessary).
+ /// Otherwise, return false. This can happen if the induction variable
+ /// couldn't be identified, or if the value in the latch's comparison
+ /// cannot be adjusted to reflect the post-bump value.
+ bool fixupInductionVariable(MachineLoop *L);
+
+ /// \brief Given a loop, if it does not have a preheader, create one.
+ /// Return the block that is the preheader.
+ MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
};
char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+ int HexagonHardwareLoops::Counter = 0;
+#endif
-
- // CountValue class - Abstraction for a trip count of a loop. A
- // smaller vesrsion of the MachineOperand class without the concerns
- // of changing the operand representation.
+ /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+ /// of the MachineOperand class without the concerns of changing the
+ /// operand representation.
class CountValue {
public:
enum CountValueType {
@@ -119,101 +232,62 @@ namespace {
private:
CountValueType Kind;
union Values {
- unsigned RegNum;
- int64_t ImmVal;
- Values(unsigned r) : RegNum(r) {}
- Values(int64_t i) : ImmVal(i) {}
+ struct {
+ unsigned Reg;
+ unsigned Sub;
+ } R;
+ unsigned ImmVal;
} Contents;
- bool isNegative;
public:
- CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
- isNegative(neg) {}
- explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
- isNegative(i < 0) {}
- CountValueType getType() const { return Kind; }
+ explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+ Kind = t;
+ if (Kind == CV_Register) {
+ Contents.R.Reg = v;
+ Contents.R.Sub = u;
+ } else {
+ Contents.ImmVal = v;
+ }
+ }
bool isReg() const { return Kind == CV_Register; }
bool isImm() const { return Kind == CV_Immediate; }
- bool isNeg() const { return isNegative; }
unsigned getReg() const {
assert(isReg() && "Wrong CountValue accessor");
- return Contents.RegNum;
+ return Contents.R.Reg;
}
- void setReg(unsigned Val) {
- Contents.RegNum = Val;
+ unsigned getSubReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Sub;
}
- int64_t getImm() const {
+ unsigned getImm() const {
assert(isImm() && "Wrong CountValue accessor");
- if (isNegative) {
- return -Contents.ImmVal;
- }
return Contents.ImmVal;
}
- void setImm(int64_t Val) {
- Contents.ImmVal = Val;
- }
void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
- if (isReg()) { OS << PrintReg(getReg()); }
- if (isImm()) { OS << getImm(); }
- }
- };
-
- struct HexagonFixupHwLoops : public MachineFunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
-
- HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+ if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+ if (isImm()) { OS << Contents.ImmVal; }
}
-
- private:
- /// Maximum distance between the loop instr and the basic block.
- /// Just an estimate.
- static const unsigned MAX_LOOP_DISTANCE = 200;
-
- /// fixupLoopInstrs - Check the offset between each loop instruction and
- /// the loop basic block to determine if we can use the LOOP instruction
- /// or if we need to set the LC/SA registers explicitly.
- bool fixupLoopInstrs(MachineFunction &MF);
-
- /// convertLoopInstr - Add the instruction to set the LC and SA registers
- /// explicitly.
- void convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS);
-
};
+} // end anonymous namespace
- char HexagonFixupHwLoops::ID = 0;
-} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
-/// isHardwareLoop - Returns true if the instruction is a hardware loop
-/// instruction.
+/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
return MI->getOpcode() == Hexagon::LOOP0_r ||
MI->getOpcode() == Hexagon::LOOP0_i;
}
-/// isCompareEquals - Returns true if the instruction is a compare equals
-/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::CMPEQri;
-}
-
-
-/// createHexagonHardwareLoops - Factory for creating
-/// the hardware loop phase.
FunctionPass *llvm::createHexagonHardwareLoops() {
return new HexagonHardwareLoops();
}
@@ -224,45 +298,149 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- // get the loop information
MLI = &getAnalysis<MachineLoopInfo>();
- // get the register information
MRI = &MF.getRegInfo();
- // the target specific instructio info.
- TII = MF.getTarget().getInstrInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
+ TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
+ TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
I != E; ++I) {
MachineLoop *L = *I;
- if (!L->getParentLoop()) {
+ if (!L->getParentLoop())
Changed |= convertToHardwareLoop(L);
- }
}
return Changed;
}
-/// getCanonicalInductionVariable - Check to see if the loop has a canonical
-/// induction variable. We check for a simple recurrence pattern - an
-/// integer recurrence that decrements by one each time through the loop and
-/// ends at zero. If so, return the phi node that corresponds to it.
-///
-/// Based upon the similar code in LoopInfo except this code is specific to
-/// the machine.
-/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+ unsigned &Reg,
+ int64_t &IVBump,
+ MachineInstr *&IVOp
+ ) const {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // This pair represents an induction register together with an immediate
+ // value that will be added to it in each loop iteration.
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+
+ // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
+ // from an induction operation
+ // R.next = R + bump
+ // where bump is an immediate value.
+ typedef std::map<unsigned,RegisterBump> InductionMap;
+
+ InductionMap IndMap;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction. Get the operand that corresponds to the
+ // latch block, and see if is a result of an addition of form "reg+imm",
+ // where the "reg" is defined by the PHI node we are looking at.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiOpReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add is the PHI we're
+ // looking at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ unsigned CSz = Cond.size();
+ assert (CSz == 1 || CSz == 2);
+ unsigned PredR = Cond[CSz-1].getReg();
+
+ MachineInstr *PredI = MRI->getVRegDef(PredR);
+ if (!PredI->isCompare())
+ return false;
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpImm = 0, CmpMask = 0;
+ bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+ CmpMask, CmpImm);
+ // Fail if the compare was not analyzed, or it's not comparing a register
+ // with an immediate value. Not checking the mask here, since we handle
+ // the individual compare opcodes (including CMPb) later on.
+ if (!CmpAnalyzed)
+ return false;
+
+ // Exactly one of the input registers to the comparison should be among
+ // the induction registers.
+ InductionMap::iterator IndMapEnd = IndMap.end();
+ InductionMap::iterator F = IndMapEnd;
+ if (CmpReg1 != 0) {
+ InductionMap::iterator F1 = IndMap.find(CmpReg1);
+ if (F1 != IndMapEnd)
+ F = F1;
+ }
+ if (CmpReg2 != 0) {
+ InductionMap::iterator F2 = IndMap.find(CmpReg2);
+ if (F2 != IndMapEnd) {
+ if (F != IndMapEnd)
+ return false;
+ F = F2;
+ }
+ }
+ if (F == IndMapEnd)
+ return false;
+
+ Reg = F->second.first;
+ IVBump = F->second.second;
+ IVOp = MRI->getVRegDef(F->first);
+ return true;
+}
+
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
///
-const MachineInstr
-*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts) {
MachineBasicBlock *TopMBB = L->getTopBlock();
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
assert(PI != TopMBB->pred_end() &&
"Loop must have more than one incoming edge!");
MachineBasicBlock *Backedge = *PI++;
- if (PI == TopMBB->pred_end()) return 0; // dead loop
+ if (PI == TopMBB->pred_end()) // dead loop?
+ return 0;
MachineBasicBlock *Incoming = *PI++;
- if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+ if (PI != TopMBB->pred_end()) // multiple backedges?
+ return 0;
- // make sure there is one incoming and one backedge and determine which
+ // Make sure there is one incoming and one backedge and determine which
// is which.
if (L->contains(Incoming)) {
if (L->contains(Backedge))
@@ -271,139 +449,433 @@ const MachineInstr
} else if (!L->contains(Backedge))
return 0;
- // Loop over all of the PHI nodes, looking for a canonical induction variable:
- // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
- // - The recurrence comes from the backedge.
- // - the definition is an induction operatio.n
- for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
- I != E && I->isPHI(); ++I) {
- const MachineInstr *MPhi = &*I;
- unsigned DefReg = MPhi->getOperand(0).getReg();
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- // Check each operand for the value from the backedge.
- MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
- if (L->contains(MBB)) { // operands comes from the backedge
- // Check if the definition is an induction operation.
- const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
- if (isInductionOperation(DI, DefReg)) {
- return MPhi;
- }
- }
+ // Look for the cmp instruction to determine if we can get a useful trip
+ // count. The trip count can be either a register or an immediate. The
+ // location of the value depends upon the type (reg or imm).
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return 0;
+
+ unsigned IVReg = 0;
+ int64_t IVBump = 0;
+ MachineInstr *IVOp;
+ bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+ if (!FoundIV)
+ return 0;
+
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+ MachineOperand *InitialValue = 0;
+ MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+ for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+ MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+ if (MBB == Preheader)
+ InitialValue = &IV_Phi->getOperand(i);
+ else if (MBB == Latch)
+ IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
+ }
+ if (!InitialValue)
+ return 0;
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return 0;
+
+ MachineBasicBlock *Header = L->getHeader();
+ // TB must be non-null. If FB is also non-null, one of them must be
+ // the header. Otherwise, branch to TB could be exiting the loop, and
+ // the fall through can go to the header.
+ assert (TB && "Latch block without a branch?");
+ assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+ if (!TB || (FB && TB != Header && FB != Header))
+ return 0;
+
+ // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+ // to put imm(0), followed by P in the vector Cond.
+ // If TB is not the header, it means that the "not-taken" path must lead
+ // to the header.
+ bool Negated = (Cond.size() > 1) ^ (TB != Header);
+ unsigned PredReg = Cond[Cond.size()-1].getReg();
+ MachineInstr *CondI = MRI->getVRegDef(PredReg);
+ unsigned CondOpc = CondI->getOpcode();
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int Mask = 0, ImmValue = 0;
+ bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+ Mask, ImmValue);
+ if (!AnalyzedCmp)
+ return 0;
+
+ // The comparison operator type determines how we compute the loop
+ // trip count.
+ OldInsts.push_back(CondI);
+ OldInsts.push_back(IVOp);
+
+ // Sadly, the following code gets information based on the position
+ // of the operands in the compare instruction. This has to be done
+ // this way, because the comparisons check for a specific relationship
+ // between the operands (e.g. is-less-than), rather than to find out
+ // what relationship the operands are in (as on PPC).
+ Comparison::Kind Cmp;
+ bool isSwapped = false;
+ const MachineOperand &Op1 = CondI->getOperand(1);
+ const MachineOperand &Op2 = CondI->getOperand(2);
+ const MachineOperand *EndValue = 0;
+
+ if (Op1.isReg()) {
+ if (Op2.isImm() || Op1.getReg() == IVReg)
+ EndValue = &Op2;
+ else {
+ EndValue = &Op1;
+ isSwapped = true;
}
}
- return 0;
-}
-/// getTripCount - Return a loop-invariant LLVM value indicating the
-/// number of times the loop will be executed. The trip count can
-/// be either a register or a constant value. If the trip-count
-/// cannot be determined, this returns null.
-///
-/// We find the trip count from the phi instruction that defines the
-/// induction variable. We follow the links to the CMP instruction
-/// to get the trip count.
-///
-/// Based upon getTripCount in LoopInfo.
-///
-CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
- // Check that the loop has a induction variable.
- const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
- if (IV_Inst == 0) return 0;
-
- // Canonical loops will end with a 'cmpeq_ri IV, Imm',
- // if Imm is 0, get the count from the PHI opnd
- // if Imm is -M, than M is the count
- // Otherwise, Imm is the count
- const MachineOperand *IV_Opnd;
- const MachineOperand *InitialValue;
- if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
- InitialValue = &IV_Inst->getOperand(1);
- IV_Opnd = &IV_Inst->getOperand(3);
- } else {
- InitialValue = &IV_Inst->getOperand(3);
- IV_Opnd = &IV_Inst->getOperand(1);
- }
-
- // Look for the cmp instruction to determine if we
- // can get a useful trip count. The trip count can
- // be either a register or an immediate. The location
- // of the value depends upon the type (reg or imm).
- for (MachineRegisterInfo::reg_iterator
- RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
- RI != RE; ++RI) {
- IV_Opnd = &RI.getOperand();
- const MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI)) {
- const MachineOperand &MO = MI->getOperand(2);
- assert(MO.isImm() && "IV Cmp Operand should be 0");
- int64_t ImmVal = MO.getImm();
-
- const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
- assert(L->contains(IV_DefInstr->getParent()) &&
- "IV definition should occurs in loop");
- int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
-
- if (ImmVal == 0) {
- // Make sure the induction variable changes by one on each iteration.
- if (iv_value != 1 && iv_value != -1) {
+ if (!EndValue)
+ return 0;
+
+ switch (CondOpc) {
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
+ case Hexagon::CMPLTrr:
+ Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
+ break;
+ case Hexagon::CMPLTUrr:
+ Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
+ break;
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
+ break;
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
+ break;
+ // Very limited support for byte/halfword compares.
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPhEQri_V4: {
+ if (IVBump != 1)
+ return 0;
+
+ int64_t InitV, EndV;
+ // Since the comparisons are "ri", the EndValue should be an
+ // immediate. Check it just in case.
+ assert(EndValue->isImm() && "Unrecognized latch comparison");
+ EndV = EndValue->getImm();
+ // Allow InitialValue to be a register defined with an immediate.
+ if (InitialValue->isReg()) {
+ if (!defWithImmediate(InitialValue->getReg()))
return 0;
- }
- return new CountValue(InitialValue->getReg(), iv_value > 0);
+ InitV = getImmediate(*InitialValue);
} else {
- assert(InitialValue->isReg() && "Expecting register for init value");
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
- if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
- int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
- if ((count % iv_value) != 0) {
- return 0;
- }
- return new CountValue(count/iv_value);
- }
+ assert(InitialValue->isImm());
+ InitV = InitialValue->getImm();
+ }
+ if (InitV >= EndV)
+ return 0;
+ if (CondOpc == Hexagon::CMPbEQri_V4) {
+ if (!isInt<8>(InitV) || !isInt<8>(EndV))
+ return 0;
+ } else { // Hexagon::CMPhEQri_V4
+ if (!isInt<16>(InitV) || !isInt<16>(EndV))
+ return 0;
}
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
}
+ default:
+ return 0;
}
- return 0;
+
+ if (isSwapped)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ if (InitialValue->isReg()) {
+ unsigned R = InitialValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ OldInsts.push_back(MRI->getVRegDef(R));
+ }
+ if (EndValue->isReg()) {
+ unsigned R = EndValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ }
+
+ return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
}
-/// isInductionOperation - return true if the operation is matches the
-/// pattern that defines an induction variable:
-/// add iv, c
-///
-bool
-HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
- unsigned IVReg) const {
- return (MI->getOpcode() ==
- Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates. The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const {
+ // Cannot handle comparison EQ, i.e. while (A == B).
+ if (Cmp == Comparison::EQ)
+ return 0;
+
+ // Check if either the start or end values are an assignment of an immediate.
+ // If so, use the immediate value rather than the register.
+ if (Start->isReg()) {
+ const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+ if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
+ Start = &StartValInstr->getOperand(1);
+ }
+ if (End->isReg()) {
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
+ End = &EndValInstr->getOperand(1);
+ }
+
+ assert (Start->isReg() || Start->isImm());
+ assert (End->isReg() || End->isImm());
+
+ bool CmpLess = Cmp & Comparison::L;
+ bool CmpGreater = Cmp & Comparison::G;
+ bool CmpHasEqual = Cmp & Comparison::EQ;
+
+ // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
+ // If loop executes while iv is "less" with the iv value going down, then
+ // the iv must wrap.
+ if (CmpLess && IVBump < 0)
+ return 0;
+ // If loop executes while iv is "greater" with the iv value going up, then
+ // the iv must wrap.
+ if (CmpGreater && IVBump > 0)
+ return 0;
+
+ if (Start->isImm() && End->isImm()) {
+ // Both, start and end are immediates.
+ int64_t StartV = Start->getImm();
+ int64_t EndV = End->getImm();
+ int64_t Dist = EndV - StartV;
+ if (Dist == 0)
+ return 0;
+
+ bool Exact = (Dist % IVBump) == 0;
+
+ if (Cmp == Comparison::NE) {
+ if (!Exact)
+ return 0;
+ if ((Dist < 0) ^ (IVBump < 0))
+ return 0;
+ }
+
+ // For comparisons that include the final value (i.e. include equality
+ // with the final value), we need to increase the distance by 1.
+ if (CmpHasEqual)
+ Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+ // assert (CmpLess => Dist > 0);
+ assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
+ // assert (CmpGreater => Dist < 0);
+ assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+
+ // "Normalized" distance, i.e. with the bump set to +-1.
+ int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
+ : (-Dist + (-IVBump-1)) / (-IVBump);
+ assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
+
+ uint64_t Count = Dist1;
+
+ if (Count > 0xFFFFFFFFULL)
+ return 0;
+
+ return new CountValue(CountValue::CV_Immediate, Count);
+ }
+
+ // A general case: Start and End are some values, but the actual
+ // iteration count may not be available. If it is not, insert
+ // a computation of it into the preheader.
+
+ // If the induction variable bump is not a power of 2, quit.
+ // Othwerise we'd need a general integer division.
+ if (!isPowerOf2_64(abs64(IVBump)))
+ return 0;
+
+ MachineBasicBlock *PH = Loop->getLoopPreheader();
+ assert (PH && "Should have a preheader by now");
+ MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+ DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
+ : DebugLoc();
+
+ // If Start is an immediate and End is a register, the trip count
+ // will be "reg - imm". Hexagon's "subtract immediate" instruction
+ // is actually "reg + -imm".
+
+ // If the loop IV is going downwards, i.e. if the bump is negative,
+ // then the iteration count (computed as End-Start) will need to be
+ // negated. To avoid the negation, just swap Start and End.
+ if (IVBump < 0) {
+ std::swap(Start, End);
+ IVBump = -IVBump;
+ }
+ // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
+ // Signedness, and "including equality" are preserved.
+
+ bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+ bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+ int64_t StartV = 0, EndV = 0;
+ if (Start->isImm())
+ StartV = Start->getImm();
+ if (End->isImm())
+ EndV = End->getImm();
+
+ int64_t AdjV = 0;
+ // To compute the iteration count, we would need this computation:
+ // Count = (End - Start + (IVBump-1)) / IVBump
+ // or, when CmpHasEqual:
+ // Count = (End - Start + (IVBump-1)+1) / IVBump
+ // The "IVBump-1" part is the adjustment (AdjV). We can avoid
+ // generating an instruction specifically to add it if we can adjust
+ // the immediate values for Start or End.
+
+ if (CmpHasEqual) {
+ // Need to add 1 to the total iteration count.
+ if (Start->isImm())
+ StartV--;
+ else if (End->isImm())
+ EndV++;
+ else
+ AdjV += 1;
+ }
+
+ if (Cmp != Comparison::NE) {
+ if (Start->isImm())
+ StartV -= (IVBump-1);
+ else if (End->isImm())
+ EndV += (IVBump-1);
+ else
+ AdjV += (IVBump-1);
+ }
+
+ unsigned R = 0, SR = 0;
+ if (Start->isReg()) {
+ R = Start->getReg();
+ SR = Start->getSubReg();
+ } else {
+ R = End->getReg();
+ SR = End->getSubReg();
+ }
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ // Hardware loops cannot handle 64-bit registers. If it's a double
+ // register, it has to have a subregister.
+ if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+ return 0;
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+ // Compute DistR (register with the distance between Start and End).
+ unsigned DistR, DistSR;
+
+ // Avoid special case, where the start value is an imm(0).
+ if (Start->isImm() && StartV == 0) {
+ DistR = End->getReg();
+ DistSR = End->getSubReg();
+ } else {
+ const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
+ (RegToImm ? TII->get(Hexagon::SUB_ri) :
+ TII->get(Hexagon::ADD_ri));
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+ if (RegToReg) {
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else if (RegToImm) {
+ SubIB.addImm(EndV)
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else { // ImmToReg
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addImm(-StartV);
+ }
+ DistR = SubR;
+ DistSR = 0;
+ }
+
+ // From DistR, compute AdjR (register with the adjusted distance).
+ unsigned AdjR, AdjSR;
+
+ if (AdjV == 0) {
+ AdjR = DistR;
+ AdjSR = DistSR;
+ } else {
+ // Generate CountR = ADD DistR, AdjVal
+ unsigned AddR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+ BuildMI(*PH, InsertPos, DL, AddD, AddR)
+ .addReg(DistR, 0, DistSR)
+ .addImm(AdjV);
+
+ AdjR = AddR;
+ AdjSR = 0;
+ }
+
+ // From AdjR, compute CountR (register with the final count).
+ unsigned CountR, CountSR;
+
+ if (IVBump == 1) {
+ CountR = AdjR;
+ CountSR = AdjSR;
+ } else {
+ // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+ unsigned Shift = Log2_32(IVBump);
+
+ // Generate NormR = LSR DistR, Shift.
+ unsigned LsrR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
+ BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+ .addReg(AdjR, 0, AdjSR)
+ .addImm(Shift);
+
+ CountR = LsrR;
+ CountSR = 0;
+ }
+
+ return new CountValue(CountValue::CV_Register, CountR, CountSR);
}
-/// isInvalidOperation - Return true if the operation is invalid within
-/// hardware loop.
-bool
-HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(
+ const MachineInstr *MI) const {
// call is not allowed because the callee may use a hardware loop
- if (MI->getDesc().isCall()) {
+ if (MI->getDesc().isCall())
return true;
- }
+
// do not allow nested hardware loops
- if (isHardwareLoop(MI)) {
+ if (isHardwareLoop(MI))
return true;
- }
+
// check if the instruction defines a hardware loop register
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() &&
- (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
- MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
+ R == Hexagon::SA0 || R == Hexagon::SA1)
return true;
- }
}
return false;
}
-/// containsInvalidInstruction - Return true if the loop contains
-/// an instruction that inhibits the use of the hardware loop function.
-///
+
+/// \brief - Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop function.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -411,126 +883,258 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI)) {
+ if (isInvalidLoopOperation(MI))
return true;
- }
}
}
return false;
}
-/// converToHardwareLoop - check if the loop is a candidate for
-/// converting to a hardware loop. If so, then perform the
-/// transformation.
+
+/// \brief Returns true if the instruction is dead. This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (MRI->use_nodbg_empty(Reg))
+ continue;
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+ use_nodbg_iterator End = MRI->use_nodbg_end();
+ if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
+ return false;
+
+ MachineInstr *OnePhi = I.getOperand().getParent();
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (!OPO.isReg() || !OPO.isDef())
+ continue;
+
+ unsigned OPReg = OPO.getReg();
+ use_nodbg_iterator nextJ;
+ for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+ J != End; J = nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand &Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ // If the phi node has a user that is not MI, bail...
+ if (MI != UseMI)
+ return false;
+ }
+ }
+ DeadPhis.push_back(OnePhi);
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim.
+
+ SmallVector<MachineInstr*, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I != E; I = nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI == MI)
+ continue;
+ if (Use.isDebug())
+ UseMI->getOperand(0).setReg(0U);
+ // This may also be a "instr -> phi -> instr" case which can
+ // be removed too.
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i)
+ DeadPhis[i]->eraseFromParent();
+ }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop. If so, then perform the transformation.
///
-/// This function works on innermost loops first. A loop can
-/// be converted if it is a counting loop; either a register
-/// value or an immediate.
+/// This function works on innermost loops first. A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
///
-/// The code makes several assumptions about the representation
-/// of the loop in llvm.
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ // This is just for sanity.
+ assert(L->getHeader() && "Loop without a header?");
+
bool Changed = false;
// Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
Changed |= convertToHardwareLoop(*I);
- }
+
// If a nested loop has been converted, then we can't convert this loop.
- if (Changed) {
+ if (Changed)
return Changed;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = HWLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= HWLoopLimit)
+ return false;
+ Counter++;
}
- // Are we able to determine the trip count for the loop?
- CountValue *TripCount = getTripCount(L);
- if (TripCount == 0) {
- return false;
- }
+#endif
+
// Does the loop contain any invalid instructions?
- if (containsInvalidInstruction(L)) {
+ if (containsInvalidInstruction(L))
return false;
- }
- MachineBasicBlock *Preheader = L->getLoopPreheader();
- // No preheader means there's not place for the loop instr.
- if (Preheader == 0) {
+
+ // Is the induction variable bump feeding the latch condition?
+ if (!fixupInductionVariable(L))
return false;
- }
- MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
MachineBasicBlock *LastMBB = L->getExitingBlock();
// Don't generate hw loop if the loop has more than one exit.
- if (LastMBB == 0) {
+ if (LastMBB == 0)
return false;
- }
+
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI == LastMBB->end())
+ return false;
+
+ // Ensure the loop has a preheader: the loop instruction will be
+ // placed there.
+ bool NewPreheader = false;
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = createPreheaderForLoop(L);
+ if (!Preheader)
+ return false;
+ NewPreheader = true;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ SmallVector<MachineInstr*, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getLoopTripCount(L, OldInsts);
+ if (TripCount == 0)
+ return false;
+
+ // Is the trip count available in the preheader?
+ if (TripCount->isReg()) {
+ // There will be a use of the register inserted into the preheader,
+ // so make sure that the register is actually defined at that point.
+ MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+ MachineBasicBlock *BBDef = TCDef->getParent();
+ if (!NewPreheader) {
+ if (!MDT->dominates(BBDef, Preheader))
+ return false;
+ } else {
+ // If we have just created a preheader, the dominator tree won't be
+ // aware of it. Check if the definition of the register dominates
+ // the header, but is not the header itself.
+ if (!MDT->properlyDominates(BBDef, L->getHeader()))
+ return false;
+ }
+ }
// Determine the loop start.
MachineBasicBlock *LoopStart = L->getTopBlock();
if (L->getLoopLatch() != LastMBB) {
// When the exit and latch are not the same, use the latch block as the
// start.
- // The loop start address is used only after the 1st iteration, and the loop
- // latch may contains instrs. that need to be executed after the 1st iter.
+ // The loop start address is used only after the 1st iteration, and the
+ // loop latch may contains instrs. that need to be executed after the
+ // first iteration.
LoopStart = L->getLoopLatch();
// Make sure the latch is a successor of the exit, otherwise it won't work.
- if (!LastMBB->isSuccessor(LoopStart)) {
+ if (!LastMBB->isSuccessor(LoopStart))
return false;
- }
}
- // Convert the loop to a hardware loop
+ // Convert the loop to a hardware loop.
DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+ DebugLoc DL;
+ if (InsertPos != Preheader->end())
+ DL = InsertPos->getDebugLoc();
if (TripCount->isReg()) {
// Create a copy of the loop count register.
- MachineFunction *MF = LastMBB->getParent();
- const TargetRegisterClass *RC =
- MF->getRegInfo().getRegClass(TripCount->getReg());
- unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
- TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
- if (TripCount->isNeg()) {
- unsigned CountReg1 = CountReg;
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
- TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
- }
-
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+ .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
- TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart)
+ .addReg(CountReg);
} else {
- assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
- // Add the Loop immediate instruction to the beginning of the loop.
+ assert(TripCount->isImm() && "Expecting immediate value for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop,
+ // if the immediate fits in the instructions. Otherwise, we need to
+ // create a new virtual register.
int64_t CountImm = TripCount->getImm();
- BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
- TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+ if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
+ .addImm(CountImm);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart).addReg(CountReg);
+ } else
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
+ .addMBB(LoopStart).addImm(CountImm);
}
- // Make sure the loop start always has a reference in the CFG. We need to
- // create a BlockAddress operand to get this mechanism to work both the
+ // Make sure the loop start always has a reference in the CFG. We need
+ // to create a BlockAddress operand to get this mechanism to work both the
// MachineBasicBlock and BasicBlock objects need the flag set.
LoopStart->setHasAddressTaken();
// This line is needed to set the hasAddressTaken flag on the BasicBlock
- // object
+ // object.
BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
// Replace the loop branch with an endloop instruction.
- DebugLoc dl = LastI->getDebugLoc();
- BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+ DebugLoc LastIDL = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, LastIDL,
+ TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
// - a conditional branch to the loop start.
if (LastI->getOpcode() == Hexagon::JMP_c ||
LastI->getOpcode() == Hexagon::JMP_cNot) {
- // delete one and change/add an uncond. branch to out of the loop
+ // Delete one and change/add an uncond. branch to out of the loop.
MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
LastI = LastMBB->erase(LastI);
if (!L->contains(BranchTarget)) {
- if (LastI != LastMBB->end()) {
- TII->RemoveBranch(*LastMBB);
- }
+ if (LastI != LastMBB->end())
+ LastI = LastMBB->erase(LastI);
SmallVector<MachineOperand, 0> Cond;
- TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+ TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
}
} else {
// Conditional branch to loop start; just delete it.
@@ -538,110 +1142,413 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
}
delete TripCount;
+ // The induction operation and the comparison may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
++NumHWLoops;
return true;
}
-/// createHexagonFixupHwLoops - Factory for creating the hardware loop
-/// phase.
-FunctionPass *llvm::createHexagonFixupHwLoops() {
- return new HexagonFixupHwLoops();
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+ MachineInstr *CmpI) {
+ assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+ MachineBasicBlock *BB = BumpI->getParent();
+ if (CmpI->getParent() != BB)
+ return false;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ // Check if things are in order to begin with.
+ for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+ if (&*I == CmpI)
+ return true;
+
+ // Out of order.
+ unsigned PredR = CmpI->getOperand(0).getReg();
+ bool FoundBump = false;
+ instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
+ for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+ MachineInstr *In = &*I;
+ for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = In->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ if (MO.getReg() == PredR) // Found an intervening use of PredR.
+ return false;
+ }
+ }
+
+ if (In == BumpI) {
+ instr_iterator After = BumpI;
+ instr_iterator From = CmpI;
+ BB->splice(llvm::next(After), BB, From);
+ FoundBump = true;
+ break;
+ }
+ }
+ assert (FoundBump && "Cannot determine instruction order");
+ return FoundBump;
}
-bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
- bool Changed = fixupLoopInstrs(MF);
- return Changed;
+MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+ MachineInstr *DI = MRI->getVRegDef(R);
+ unsigned DOpc = DI->getOpcode();
+ switch (DOpc) {
+ case Hexagon::TFRI:
+ case Hexagon::TFRI64:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return DI;
+ }
+ return 0;
}
-/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
-/// loop instruction then we need to set the LC0 and SA0 registers
-/// explicitly instead of using LOOP(start,count). This function
-/// checks the distance, and generates register assignments if needed.
-///
-/// This function makes two passes over the basic blocks. The first
-/// pass computes the offset of the basic block from the start.
-/// The second pass checks all the loop instructions.
-bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
-
- // Offset of the current instruction from the start.
- unsigned InstOffset = 0;
- // Map for each basic block to it's first instruction.
- DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
-
- // First pass - compute the offset of each basic block.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- BlockToInstOffset[MBB] = InstOffset;
- InstOffset += (MBB->size() * 4);
- }
-
- // Second pass - check each loop instruction to see if it needs to
- // be converted.
- InstOffset = 0;
- bool Changed = false;
- RegScavenger RS;
-
- // Loop over all the basic blocks.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- InstOffset = BlockToInstOffset[MBB];
- RS.enterBasicBlock(MBB);
-
- // Loop over all the instructions.
- MachineBasicBlock::iterator MIE = MBB->end();
- MachineBasicBlock::iterator MII = MBB->begin();
- while (MII != MIE) {
- if (isHardwareLoop(MII)) {
- RS.forward(MII);
- assert(MII->getOperand(0).isMBB() &&
- "Expect a basic block as loop operand");
- int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
- diff = (diff > 0 ? diff : -diff);
- if ((unsigned)diff > MAX_LOOP_DISTANCE) {
- // Convert to explicity setting LC0 and SA0.
- convertLoopInstr(MF, MII, RS);
- MII = MBB->erase(MII);
- Changed = true;
- } else {
- ++MII;
+
+int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ assert(DI && "Need an immediate operand");
+ // All currently supported "define-with-immediate" instructions have the
+ // actual immediate value in the operand(1).
+ int64_t v = DI->getOperand(1).getImm();
+ return v;
+}
+
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+ if (MO.isImm()) {
+ MO.setImm(Val);
+ return;
+ }
+
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ if (MRI->hasOneNonDBGUse(R)) {
+ // If R has only one use, then just change its defining instruction to
+ // the new immediate value.
+ DI->getOperand(1).setImm(Val);
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ MachineBasicBlock &B = *DI->getParent();
+ DebugLoc DL = DI->getDebugLoc();
+ BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
+ .addImm(Val);
+ MO.setReg(NewR);
+}
+
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // These data structures follow the same concept as the corresponding
+ // ones in findInductionRegister (where some comments are).
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+ typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+ typedef std::set<RegisterInduction> RegisterInductionSet;
+
+ // Register candidates for induction variables, with their associated bumps.
+ RegisterInductionSet IndRegs;
+
+ // Look for induction patterns:
+ // vreg1 = PHI ..., [ latch, vreg2 ]
+ // vreg2 = ADD vreg1, imm
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add/sub is the PHI we are looking
+ // at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
- } else {
- ++MII;
}
- InstOffset += 4;
+ } // for (i)
+ } // for (instr)
+
+ if (IndRegs.empty())
+ return false;
+
+ MachineBasicBlock *TB = 0, *FB = 0;
+ SmallVector<MachineOperand,2> Cond;
+ // AnalyzeBranch returns true if it fails to analyze branch.
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ // Check if the latch branch is unconditional.
+ if (Cond.empty())
+ return false;
+
+ if (TB != Header && FB != Header)
+ // The latch does not go back to the header. Not a latch we know and love.
+ return false;
+
+ // Expecting a predicate register as a condition. It won't be a hardware
+ // predicate register at this point yet, just a vreg.
+ // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+ // into Cond, followed by the predicate register. For non-negated branches
+ // it's just the register.
+ unsigned CSz = Cond.size();
+ if (CSz != 1 && CSz != 2)
+ return false;
+
+ unsigned P = Cond[CSz-1].getReg();
+ MachineInstr *PredDef = MRI->getVRegDef(P);
+
+ if (!PredDef->isCompare())
+ return false;
+
+ SmallSet<unsigned,2> CmpRegs;
+ MachineOperand *CmpImmOp = 0;
+
+ // Go over all operands to the compare and look for immediate and register
+ // operands. Assume that if the compare has a single register use and a
+ // single immediate operand, then the register is being compared with the
+ // immediate value.
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg()) {
+ // Skip all implicit references. In one case there was:
+ // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+ if (MO.isImplicit())
+ continue;
+ if (MO.isUse()) {
+ unsigned R = MO.getReg();
+ if (!defWithImmediate(R)) {
+ CmpRegs.insert(MO.getReg());
+ continue;
+ }
+ // Consider the register to be the "immediate" operand.
+ if (CmpImmOp)
+ return false;
+ CmpImmOp = &MO;
+ }
+ } else if (MO.isImm()) {
+ if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
+ return false;
+ CmpImmOp = &MO;
}
}
- return Changed;
+ if (CmpRegs.empty())
+ return false;
+
+ // Check if the compared register follows the order we want. Fix if needed.
+ for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+ I != E; ++I) {
+ // This is a success. If the register used in the comparison is one that
+ // we have identified as a bumped (updated) induction register, there is
+ // nothing to do.
+ if (CmpRegs.count(I->first))
+ return true;
+
+ // Otherwise, if the register being compared comes out of a PHI node,
+ // and has been recognized as following the induction pattern, and is
+ // compared against an immediate, we can fix it.
+ const RegisterBump &RB = I->second;
+ if (CmpRegs.count(RB.first)) {
+ if (!CmpImmOp)
+ return false;
+
+ int64_t CmpImm = getImmediate(*CmpImmOp);
+ int64_t V = RB.second;
+ if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit).
+ return false;
+ if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit).
+ return false;
+ CmpImm += V;
+ // Some forms of cmp-immediate allow u9 and s10. Assume the worst case
+ // scenario, i.e. an 8-bit value.
+ if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
+ return false;
+
+ // Make sure that the compare happens after the bump. Otherwise,
+ // after the fixup, the compare would use a yet-undefined register.
+ MachineInstr *BumpI = MRI->getVRegDef(I->first);
+ bool Order = orderBumpCompare(BumpI, PredDef);
+ if (!Order)
+ return false;
+
+ // Finally, fix the compare instruction.
+ setImmediate(*CmpImmOp, CmpImm);
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ MO.setReg(I->first);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
}
-/// convertLoopInstr - convert a loop instruction to a sequence of instructions
-/// that set the lc and sa register explicitly.
-void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS) {
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
- MachineBasicBlock *MBB = MII->getParent();
- DebugLoc DL = MII->getDebugLoc();
- unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
-
- // First, set the LC0 with the trip count.
- if (MII->getOperand(1).isReg()) {
- // Trip count is a register
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
- .addReg(MII->getOperand(1).getReg());
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+ MachineLoop *L) {
+ if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+ return TmpPH;
+
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineFunction *MF = Header->getParent();
+ DebugLoc DL;
+
+ if (!Latch || Header->hasAddressTaken())
+ return 0;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+
+ // Verify that all existing predecessors have analyzable branches
+ // (or no branches at all).
+ typedef std::vector<MachineBasicBlock*> MBBVector;
+ MBBVector Preds(Header->pred_begin(), Header->pred_end());
+ SmallVector<MachineOperand,2> Tmp1;
+ MachineBasicBlock *TB = 0, *FB = 0;
+
+ if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+ return 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+ if (NotAnalyzed)
+ return 0;
+ }
+ }
+
+ MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+ MF->insert(Header, NewPH);
+
+ if (Header->pred_size() > 2) {
+ // Ensure that the header has only two predecessors: the preheader and
+ // the loop latch. Any additional predecessors of the header should
+ // join at the newly created preheader. Inspect all PHI nodes from the
+ // header and create appropriate corresponding PHI nodes in the preheader.
+
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+
+ const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+ MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+ NewPH->insert(NewPH->end(), NewPN);
+
+ unsigned PR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(PR);
+ unsigned NewPR = MRI->createVirtualRegister(RC);
+ NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+ // Copy all non-latch operands of a header's PHI node to the newly
+ // created PHI node in the preheader.
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ unsigned PredR = PN->getOperand(i).getReg();
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB == Latch)
+ continue;
+
+ NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+ NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+ }
+
+ // Remove copied operands from the old PHI node and add the value
+ // coming from the preheader's PHI.
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB != Latch) {
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ }
+ PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+ PN->addOperand(MachineOperand::CreateMBB(NewPH));
+ }
+
} else {
- // Trip count is an immediate.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
- .addImm(MII->getOperand(1).getImm());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
- .addReg(Scratch);
- }
- // Then, set the SA0 with the loop start address.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
- .addMBB(MII->getOperand(0).getMBB());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+ assert(Header->pred_size() == 2);
+
+ // The header has only two predecessors, but the non-latch predecessor
+ // is not a preheader (e.g. it has other successors, etc.)
+ // In such a case we don't need any extra PHI nodes in the new preheader,
+ // all we need is to adjust existing PHIs in the header to now refer to
+ // the new preheader.
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ MachineOperand &MO = PN->getOperand(i+1);
+ if (MO.getMBB() != Latch)
+ MO.setMBB(NewPH);
+ }
+ }
+ }
+
+ // "Reroute" the CFG edges to link in the new preheader.
+ // If any of the predecessors falls through to the header, insert a branch
+ // to the new preheader in that place.
+ SmallVector<MachineOperand,1> Tmp2;
+ SmallVector<MachineOperand,1> EmptyCond;
+
+ TB = FB = 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ Tmp2.clear();
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+ (void)NotAnalyzed; // supress compiler warning
+ assert (!NotAnalyzed && "Should be analyzable!");
+ if (TB != Header && (Tmp2.empty() || FB != Header))
+ TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
+ PB->ReplaceUsesOfBlockWith(Header, NewPH);
+ }
+ }
+
+ // It can happen that the latch block will fall through into the header.
+ // Insert an unconditional branch to the header.
+ TB = FB = 0;
+ bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+ (void)LatchNotAnalyzed; // supress compiler warning
+ assert (!LatchNotAnalyzed && "Should be analyzable!");
+ if (!TB && !FB)
+ TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
+
+ // Finally, the branch from the preheader to the header.
+ TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
+ NewPH->addSuccessor(Header);
+
+ return NewPH;
}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 5499134eb98b..8fc9ba1ee8cf 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -12,20 +12,32 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-isel"
+#include "Hexagon.h"
#include "HexagonISelLowering.h"
#include "HexagonTargetMachine.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-
using namespace llvm;
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+ cl::Hidden, cl::init(2),
+ cl::desc("Maximum number of uses of a global address such that we still us a"
+ "constant extended instruction"));
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
+namespace llvm {
+ void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
//===--------------------------------------------------------------------===//
/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
/// instructions for SelectionDAG operations.
@@ -39,19 +51,24 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
// Keep a reference to HexagonTargetMachine.
HexagonTargetMachine& TM;
const HexagonInstrInfo *TII;
-
+ DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
- : SelectionDAGISel(targetmachine),
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(targetmachine, OptLevel),
Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
TM(targetmachine),
TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
-
+ initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
+ bool hasNumUsesBelowThresGA(SDNode *N) const;
SDNode *Select(SDNode *N);
// Complex Pattern Selectors.
+ inline bool foldGlobalAddress(SDValue &N, SDValue &R);
+ inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
+ bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
@@ -94,8 +111,56 @@ public:
SDNode *SelectConstant(SDNode *N);
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
+ bool isConstExtProfitable(SDNode *N) const;
+
+// XformMskToBitPosU5Imm - Returns the bit position which
+// the single bit 32 bit mask represents.
+// Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, MVT::i32);
+}
+
+// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// Return true if there is exactly one bit set in V, i.e., if V is one of the
+// following integers: 2^0, 2^1, ..., 2^31.
+bool ImmIsSingleBit(uint32_t v) const {
+ uint32_t c = CountPopulation_64(v);
+ // Only return true if we counted 1 bit.
+ return c == 1;
+}
+
+// XformM5ToU5Imm - Return a target constant with the specified value, of type
+// i32 where the negative literal is transformed into a positive literal for
+// use in -= memops.
+inline SDValue XformM5ToU5Imm(signed Imm) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, MVT::i32);
+}
+
+
+// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+// [1..128], used in cmpb.gtu instructions.
+inline SDValue XformU7ToU7M1Imm(signed Imm) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+}
- // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
#include "HexagonGenDAGISel.inc"
};
} // end anonymous namespace
@@ -104,10 +169,23 @@ public:
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
- return new HexagonDAGToDAGISel(TM);
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new HexagonDAGToDAGISel(TM, OptLevel);
}
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+ &SelectionDAGISel::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
static bool IsS11_0_Offset(SDNode * S) {
ConstantSDNode *N = cast<ConstantSDNode>(S);
@@ -606,8 +684,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
if (TII->isValidAutoIncImm(StoredVT, Val)) {
- SDValue Ops[] = { Value, Base,
- CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+ Chain};
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -1507,3 +1585,79 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
OutOps.push_back(Op1);
return false;
}
+
+bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
+ unsigned UseCount = 0;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ UseCount++;
+ }
+
+ return (UseCount <= 1);
+
+}
+
+//===--------------------------------------------------------------------===//
+// Return 'true' if use count of the global address is below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
+ assert(N->getOpcode() == ISD::TargetGlobalAddress &&
+ "Expecting a target global address");
+
+ // Always try to fold the address.
+ if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ return true;
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
+ GlobalAddressUseCountMap.find(GA->getGlobal());
+
+ if (GI == GlobalAddressUseCountMap.end())
+ return false;
+
+ return GI->second <= MaxNumOfUsesForConstExtenders;
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the non GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, false);
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, true);
+}
+
+//===--------------------------------------------------------------------===//
+// Fold offset of the global address if number of uses are below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
+ bool ShouldLookForGP) {
+ if (N.getOpcode() == ISD::ADD) {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
+ (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
+ GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
+
+ if (Const && GA &&
+ (GA->getOpcode() == ISD::TargetGlobalAddress)) {
+ if ((N0.getOpcode() == HexagonISD::CONST32) &&
+ !hasNumUsesBelowThresGA(GA))
+ return false;
+ R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
+ Const->getDebugLoc(),
+ N.getValueType(),
+ GA->getOffset() +
+ (uint64_t)Const->getSExtValue());
+ return true;
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1c891f14d8fe..15858a9368ae 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -13,17 +13,10 @@
//===----------------------------------------------------------------------===//
#include "HexagonISelLowering.h"
-#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
-#include "HexagonTargetObjectFile.h"
#include "HexagonSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +25,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -103,6 +103,16 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
ofst = State.AllocateStack(4, 4);
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
@@ -304,15 +314,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
// Analyze return values of ISD::RET
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
@@ -321,12 +325,17 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
@@ -608,7 +617,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
// TODO: Put this function along with the other isS* functions in
// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonImmediates.td.
+// functions defined in HexagonOperands.td.
static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
ConstantSDNode *N = cast<ConstantSDNode>(S);
@@ -1016,8 +1025,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
- HexagonTargetObjectFile &TLOF =
- (HexagonTargetObjectFile&)getObjFileLowering();
+ const HexagonTargetObjectFile &TLOF =
+ static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
}
@@ -1025,6 +1034,14 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
}
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+}
+
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -1053,8 +1070,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setPrefLoopAlignment(4);
// Limits for inline expansion of memcpy/memmove
- maxStoresPerMemcpy = 6;
- maxStoresPerMemmove = 6;
+ MaxStoresPerMemcpy = 6;
+ MaxStoresPerMemmove = 6;
//
// Library calls for unsupported operations
@@ -1298,6 +1315,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
// Custom legalize GlobalAddress nodes into CONST32.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
// Truncate action?
setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
@@ -1343,7 +1361,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
}
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
if (EmitJumpTables) {
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
@@ -1353,7 +1370,12 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
// Increase jump tables cutover to 5, was 4.
setMinimumJumpTableEntries(5);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -1364,11 +1386,41 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // In V4, we have double word add/sub with carry. The problem with
+ // modelling this instruction is that it produces 2 results - Rdd and Px.
+ // To model update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't
+ // have these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -1430,6 +1482,8 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
@@ -1478,6 +1532,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index fe6c905adfcb..3279cc652434 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -16,9 +16,9 @@
#define Hexagon_ISELLOWERING_H
#include "Hexagon.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace HexagonISD {
@@ -27,6 +27,7 @@ namespace llvm {
CONST32,
CONST32_GP, // For marking data present in GP.
+ CONST32_Int_Real,
FCONST32,
SETCC,
ADJDYNALLOC,
@@ -50,6 +51,17 @@ namespace llvm {
BARRIER, // Memory barrier.
WrapperJT,
WrapperCP,
+ WrapperCombineII,
+ WrapperCombineRR,
+ WrapperCombineRI_V4,
+ WrapperCombineIR_V4,
+ WrapperPackhl,
+ WrapperSplatB,
+ WrapperSplatH,
+ WrapperShuffEB,
+ WrapperShuffEH,
+ WrapperShuffOB,
+ WrapperShuffOH,
TC_RETURN
};
}
@@ -95,6 +107,7 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
deleted file mode 100644
index 18692c4dcc5e..000000000000
--- a/lib/Target/Hexagon/HexagonImmediates.td
+++ /dev/null
@@ -1,508 +0,0 @@
-//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illnois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// From IA64's InstrInfo file
-def s32Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s16Imm : Operand<i32> {
- let PrintMethod = "printImmOperand";
-}
-
-def s12Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s10Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s9Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s8Imm64 : Operand<i64> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u64Imm : Operand<i64> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u32Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u11_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u10Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u9Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u7Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u5Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u4Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def n8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def m6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def nOneImm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printNOneImmOperand";
-}
-
-//
-// Immediate predicates
-//
-def s32ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<32>(v);
-}]>;
-
-def s32_24ImmPred : PatLeaf<(i32 imm), [{
- // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x1000000.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<32,24>(v);
-}]>;
-
-def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
- // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x10000.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<24,16>(v);
-}]>;
-
-def s16ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<16>(v);
-}]>;
-
-
-def s13ImmPred : PatLeaf<(i32 imm), [{
- // immS13 predicate - True if the immediate fits in a 13-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<13>(v);
-}]>;
-
-
-def s12ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<12>(v);
-}]>;
-
-def s11_0ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<11>(v);
-}]>;
-
-
-def s11_1ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,1>(v);
-}]>;
-
-
-def s11_2ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,2>(v);
-}]>;
-
-
-def s11_3ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,3>(v);
-}]>;
-
-
-def s10ImmPred : PatLeaf<(i32 imm), [{
- // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<10>(v);
-}]>;
-
-
-def s9ImmPred : PatLeaf<(i32 imm), [{
- // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<9>(v);
-}]>;
-
-
-def s8ImmPred : PatLeaf<(i32 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-
-def s8Imm64Pred : PatLeaf<(i64 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-
-def s6ImmPred : PatLeaf<(i32 imm), [{
- // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<6>(v);
-}]>;
-
-
-def s4_0ImmPred : PatLeaf<(i32 imm), [{
- // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<4>(v);
-}]>;
-
-
-def s4_1ImmPred : PatLeaf<(i32 imm), [{
- // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field of 2.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,1>(v);
-}]>;
-
-
-def s4_2ImmPred : PatLeaf<(i32 imm), [{
- // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 4.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,2>(v);
-}]>;
-
-
-def s4_3ImmPred : PatLeaf<(i32 imm), [{
- // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 8.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,3>(v);
-}]>;
-
-
-def u64ImmPred : PatLeaf<(i64 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- // Adding "N ||" to suppress gcc unused warning.
- return (N || true);
-}]>;
-
-def u32ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
-}]>;
-
-def u16ImmPred : PatLeaf<(i32 imm), [{
- // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<16>(v);
-}]>;
-
-def u16_s8ImmPred : PatLeaf<(i32 imm), [{
- // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
- // extended s8 field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<16,8>(v);
-}]>;
-
-def u9ImmPred : PatLeaf<(i32 imm), [{
- // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<9>(v);
-}]>;
-
-
-def u8ImmPred : PatLeaf<(i32 imm), [{
- // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<8>(v);
-}]>;
-
-def u7ImmPred : PatLeaf<(i32 imm), [{
- // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<7>(v);
-}]>;
-
-
-def u6ImmPred : PatLeaf<(i32 imm), [{
- // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}]>;
-
-def u6_0ImmPred : PatLeaf<(i32 imm), [{
- // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field. Same as u6ImmPred.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}]>;
-
-def u6_1ImmPred : PatLeaf<(i32 imm), [{
- // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field that is 1 bit alinged - multiple of 2.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}]>;
-
-def u6_2ImmPred : PatLeaf<(i32 imm), [{
- // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field that is 2 bits alinged - multiple of 4.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}]>;
-
-def u6_3ImmPred : PatLeaf<(i32 imm), [{
- // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field that is 3 bits alinged - multiple of 8.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,3>(v);
-}]>;
-
-def u5ImmPred : PatLeaf<(i32 imm), [{
- // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<5>(v);
-}]>;
-
-
-def u3ImmPred : PatLeaf<(i32 imm), [{
- // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<3>(v);
-}]>;
-
-
-def u2ImmPred : PatLeaf<(i32 imm), [{
- // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<2>(v);
-}]>;
-
-
-def u1ImmPred : PatLeaf<(i1 imm), [{
- // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<1>(v);
-}]>;
-
-def m6ImmPred : PatLeaf<(i32 imm), [{
- // m6ImmPred predicate - True if the immediate is negative and fits in
- // a 6-bit negative number.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<6>(v);
-}]>;
-
-//InN means negative integers in [-(2^N - 1), 0]
-def n8ImmPred : PatLeaf<(i32 imm), [{
- // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return (-255 <= v && v <= 0);
-}]>;
-
-def nOneImmPred : PatLeaf<(i32 imm), [{
- // nOneImmPred predicate - True if the immediate is -1.
- int64_t v = (int64_t)N->getSExtValue();
- return (-1 == v);
-}]>;
-
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index a64c7a18164f..587fa7d7f10e 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -13,28 +13,77 @@
// *** Must match HexagonBaseInfo.h ***
//===----------------------------------------------------------------------===//
-class Type<bits<5> t> {
+class IType<bits<5> t> {
bits<5> Value = t;
}
-def TypePSEUDO : Type<0>;
-def TypeALU32 : Type<1>;
-def TypeCR : Type<2>;
-def TypeJR : Type<3>;
-def TypeJ : Type<4>;
-def TypeLD : Type<5>;
-def TypeST : Type<6>;
-def TypeSYSTEM : Type<7>;
-def TypeXTYPE : Type<8>;
-def TypeMARKER : Type<31>;
+def TypePSEUDO : IType<0>;
+def TypeALU32 : IType<1>;
+def TypeCR : IType<2>;
+def TypeJR : IType<3>;
+def TypeJ : IType<4>;
+def TypeLD : IType<5>;
+def TypeST : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE : IType<8>;
+def TypeENDLOOP: IType<31>;
+
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<4> value> {
+ bits<4> Value = value;
+}
+
+def HasV2SubT : SubTarget<0xf>;
+def HasV2SubTOnly : SubTarget<0x1>;
+def NoV2SubT : SubTarget<0x0>;
+def HasV3SubT : SubTarget<0xe>;
+def HasV3SubTOnly : SubTarget<0x2>;
+def NoV3SubT : SubTarget<0x1>;
+def HasV4SubT : SubTarget<0xc>;
+def NoV4SubT : SubTarget<0x3>;
+def HasV5SubT : SubTarget<0x8>;
+def NoV5SubT : SubTarget<0x7>;
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoAddrMode : AddrModeType<0>; // No addressing mode
+def Absolute : AddrModeType<1>; // Absolute addressing mode
+def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode
+def BaseImmOffset : AddrModeType<3>; // Indirect with offset
+def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
+def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
+
+class MemAccessSize<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess : MemAccessSize<3>;// Word access instrution (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+
//===----------------------------------------------------------------------===//
// Intruction Class Declaration +
//===----------------------------------------------------------------------===//
-class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr, InstrItinClass itin, Type type> : Instruction {
- field bits<32> Inst;
+class OpcodeHexagon {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<2> IParse = 0; // Parse bits.
+
+ let Inst{31-28} = IClass;
+ let Inst{15-14} = IParse;
+
+ bits<1> zero = 0;
+}
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr, InstrItinClass itin, IType type>
+ : Instruction, OpcodeHexagon {
let Namespace = "Hexagon";
dag OutOperandList = outs;
@@ -45,20 +94,63 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let Itinerary = itin;
let Size = 4;
- // *** Must match HexagonBaseInfo.h ***
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
// Instruction type according to the ISA.
- Type HexagonType = type;
- let TSFlags{4-0} = HexagonType.Value;
+ IType Type = type;
+ let TSFlags{4-0} = Type.Value;
+
// Solo instructions, i.e., those that cannot be in a packet with others.
- bits<1> isHexagonSolo = 0;
- let TSFlags{5} = isHexagonSolo;
+ bits<1> isSolo = 0;
+ let TSFlags{5} = isSolo;
+
// Predicated instructions.
bits<1> isPredicated = 0;
let TSFlags{6} = isPredicated;
-
- // Dot new value store instructions.
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<2> opNewBits = 0;
+ let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16.
+ bits<1> isNVStorable = 0;
+ let TSFlags{16} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{8} = isNVStore;
+ let TSFlags{17} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{18} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{19} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{22-20} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{23} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending.
+
+ // If an instruction is valid on a subtarget (v2-v5), set the corresponding
+ // bit from validSubTargets. v2 is the least significant bit.
+ // By default, instruction is valid on all subtargets.
+ SubTarget validSubTargets = HasV2SubT;
+ let TSFlags{32-29} = validSubTargets.Value;
+
+ // Addressing mode for load/store instructions.
+ AddrModeType addrMode = NoAddrMode;
+ let TSFlags{35-33} = addrMode.Value;
+
+ // Memory access size for mem access instructions (load/store)
+ MemAccessSize accessSize = NoMemAccess;
+ let TSFlags{38-36} = accessSize.Value;
// Fields used for relation models.
string BaseOpcode = "";
@@ -66,7 +158,15 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string PredSense = "";
string PNewValue = "";
string InputType = ""; // Input is "imm" or "reg" type.
- // *** The code above must match HexagonBaseInfo.h ***
+ string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
+ string isFloat = "false"; // Set to "true" for the floating-point load/store.
+ string isBrTaken = ""; // Set to "true"/"false" for jump instructions
+
+ let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+ "");
+ let PNewValue = !if(isPredicatedNew, "new", "");
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
//===----------------------------------------------------------------------===//
@@ -75,187 +175,143 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>;
-class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
- let mayLoad = 1;
-}
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>;
-class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
- let mayStore = 1;
-}
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
-// SYSTEM Instruction Class in V4 can take SLOT0 only
-// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
-class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
-class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MInst<outs, ins, asmstr, pattern, cstr>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
-// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
-// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : SInst<outs, ins, asmstr, pattern, cstr>;
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
- bits<16> imm16;
-}
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
- bits<5> rs;
- bits<5> pu; // Predicate register
-}
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
- bits<5> rs;
- bits<10> imm10;
-}
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>;
-class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>;
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr="">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
//===----------------------------------------------------------------------===//
// Intruction Classes Definitions -
@@ -265,75 +321,52 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
//
// ALU32 patterns
//.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
-}
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
//
// ALU64 patterns.
//
-class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU64Type<outs, ins, asmstr, pattern> {
-}
-
-class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU64Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
-
-// J Type Instructions.
-class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : JType<outs, ins, asmstr, pattern> {
-}
-
-// JR type Instructions.
-class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : JRType<outs, ins, asmstr, pattern> {
-}
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
// Post increment ST Instruction.
-class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : STInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
-class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : STInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
- let mayStore = 1;
-}
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
// Post increment LD Instruction.
-class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : LDInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
-
-class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : LDInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
- let mayLoad = 1;
-}
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
//===----------------------------------------------------------------------===//
// V4 Instruction Format Definitions +
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 49741a3d1b20..9fda0da91612 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,9 +17,9 @@
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
-def TypeMEMOP : Type<9>;
-def TypeNV : Type<10>;
-def TypePREFIX : Type<30>;
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypePREFIX : IType<30>;
//----------------------------------------------------------------------------//
// Intruction Classes Definitions +
@@ -28,40 +28,39 @@ def TypePREFIX : Type<30>;
//
// NV type instructions.
//
-class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
// Definition of Post increment new value store.
-class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
// Post increment ST Instruction.
-class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
-class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
- bits<5> rd;
- bits<5> rs;
- bits<6> imm6;
-}
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>;
-class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
- let isCodeGenOnly = 1;
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MEMInst<outs, ins, asmstr, pattern, cstr>;
- bits<26> imm26;
-}
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 84354403084d..60b12ac01c9c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
#include "HexagonInstrInfo.h"
+#include "Hexagon.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
-#include "Hexagon.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/MathExtras.h"
#define GET_INSTRINFO_CTOR
@@ -305,6 +305,88 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
}
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUri_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhGTUri_V4:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -314,7 +396,7 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
- BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg);
return;
}
if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
@@ -344,6 +426,18 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
return;
}
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ Hexagon::PredRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
llvm_unreachable("Unimplemented");
}
@@ -443,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return(0);
}
+MachineInstr*
+HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE))
+ .addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
@@ -463,270 +566,43 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
}
bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
- switch(MI->getOpcode()) {
- default: return false;
- // JMP_EQri
- case Hexagon::JMP_EQriPt_nv_V4:
- case Hexagon::JMP_EQriPnt_nv_V4:
- case Hexagon::JMP_EQriNotPt_nv_V4:
- case Hexagon::JMP_EQriNotPnt_nv_V4:
-
- // JMP_EQri - with -1
- case Hexagon::JMP_EQriPtneg_nv_V4:
- case Hexagon::JMP_EQriPntneg_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_nv_V4:
- case Hexagon::JMP_EQrrPnt_nv_V4:
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_nv_V4:
- case Hexagon::JMP_GTriPnt_nv_V4:
- case Hexagon::JMP_GTriNotPt_nv_V4:
- case Hexagon::JMP_GTriNotPnt_nv_V4:
-
- // JMP_GTri - with -1
- case Hexagon::JMP_GTriPtneg_nv_V4:
- case Hexagon::JMP_GTriPntneg_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_nv_V4:
- case Hexagon::JMP_GTrrPnt_nv_V4:
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_nv_V4:
- case Hexagon::JMP_GTUriPnt_nv_V4:
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_nv_V4:
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
- // TFR_FI
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
case Hexagon::TFR_FI:
return true;
+ default:
+ return false;
}
+ return false;
}
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
- switch(MI->getOpcode()) {
- default: return false;
- // JMP_EQri
- case Hexagon::JMP_EQriPt_ie_nv_V4:
- case Hexagon::JMP_EQriPnt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
- // JMP_EQri - with -1
- case Hexagon::JMP_EQriPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriPntneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_ie_nv_V4:
- case Hexagon::JMP_EQrrPnt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_ie_nv_V4:
- case Hexagon::JMP_GTriPnt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
- // JMP_GTri - with -1
- case Hexagon::JMP_GTriPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriPntneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_ie_nv_V4:
- case Hexagon::JMP_GTrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_ie_nv_V4:
- case Hexagon::JMP_GTUriPnt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-
- // V4 absolute set addressing.
- case Hexagon::LDrid_abs_setimm_V4:
- case Hexagon::LDriw_abs_setimm_V4:
- case Hexagon::LDrih_abs_setimm_V4:
- case Hexagon::LDrib_abs_setimm_V4:
- case Hexagon::LDriuh_abs_setimm_V4:
- case Hexagon::LDriub_abs_setimm_V4:
-
- case Hexagon::STrid_abs_setimm_V4:
- case Hexagon::STrib_abs_setimm_V4:
- case Hexagon::STrih_abs_setimm_V4:
- case Hexagon::STriw_abs_setimm_V4:
-
- // V4 global address load.
- case Hexagon::LDrid_GP_cPt_V4 :
- case Hexagon::LDrid_GP_cNotPt_V4 :
- case Hexagon::LDrid_GP_cdnPt_V4 :
- case Hexagon::LDrid_GP_cdnNotPt_V4 :
- case Hexagon::LDrib_GP_cPt_V4 :
- case Hexagon::LDrib_GP_cNotPt_V4 :
- case Hexagon::LDrib_GP_cdnPt_V4 :
- case Hexagon::LDrib_GP_cdnNotPt_V4 :
- case Hexagon::LDriub_GP_cPt_V4 :
- case Hexagon::LDriub_GP_cNotPt_V4 :
- case Hexagon::LDriub_GP_cdnPt_V4 :
- case Hexagon::LDriub_GP_cdnNotPt_V4 :
- case Hexagon::LDrih_GP_cPt_V4 :
- case Hexagon::LDrih_GP_cNotPt_V4 :
- case Hexagon::LDrih_GP_cdnPt_V4 :
- case Hexagon::LDrih_GP_cdnNotPt_V4 :
- case Hexagon::LDriuh_GP_cPt_V4 :
- case Hexagon::LDriuh_GP_cNotPt_V4 :
- case Hexagon::LDriuh_GP_cdnPt_V4 :
- case Hexagon::LDriuh_GP_cdnNotPt_V4 :
- case Hexagon::LDriw_GP_cPt_V4 :
- case Hexagon::LDriw_GP_cNotPt_V4 :
- case Hexagon::LDriw_GP_cdnPt_V4 :
- case Hexagon::LDriw_GP_cdnNotPt_V4 :
- case Hexagon::LDd_GP_cPt_V4 :
- case Hexagon::LDd_GP_cNotPt_V4 :
- case Hexagon::LDd_GP_cdnPt_V4 :
- case Hexagon::LDd_GP_cdnNotPt_V4 :
- case Hexagon::LDb_GP_cPt_V4 :
- case Hexagon::LDb_GP_cNotPt_V4 :
- case Hexagon::LDb_GP_cdnPt_V4 :
- case Hexagon::LDb_GP_cdnNotPt_V4 :
- case Hexagon::LDub_GP_cPt_V4 :
- case Hexagon::LDub_GP_cNotPt_V4 :
- case Hexagon::LDub_GP_cdnPt_V4 :
- case Hexagon::LDub_GP_cdnNotPt_V4 :
- case Hexagon::LDh_GP_cPt_V4 :
- case Hexagon::LDh_GP_cNotPt_V4 :
- case Hexagon::LDh_GP_cdnPt_V4 :
- case Hexagon::LDh_GP_cdnNotPt_V4 :
- case Hexagon::LDuh_GP_cPt_V4 :
- case Hexagon::LDuh_GP_cNotPt_V4 :
- case Hexagon::LDuh_GP_cdnPt_V4 :
- case Hexagon::LDuh_GP_cdnNotPt_V4 :
- case Hexagon::LDw_GP_cPt_V4 :
- case Hexagon::LDw_GP_cNotPt_V4 :
- case Hexagon::LDw_GP_cdnPt_V4 :
- case Hexagon::LDw_GP_cdnNotPt_V4 :
-
- // V4 global address store.
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrid_GP_cdnPt_V4 :
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cdnPt_V4 :
- case Hexagon::STrib_GP_cdnNotPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cdnPt_V4 :
- case Hexagon::STrih_GP_cdnNotPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cdnPt_V4 :
- case Hexagon::STriw_GP_cdnNotPt_V4 :
- case Hexagon::STd_GP_cPt_V4 :
- case Hexagon::STd_GP_cNotPt_V4 :
- case Hexagon::STd_GP_cdnPt_V4 :
- case Hexagon::STd_GP_cdnNotPt_V4 :
- case Hexagon::STb_GP_cPt_V4 :
- case Hexagon::STb_GP_cNotPt_V4 :
- case Hexagon::STb_GP_cdnPt_V4 :
- case Hexagon::STb_GP_cdnNotPt_V4 :
- case Hexagon::STh_GP_cPt_V4 :
- case Hexagon::STh_GP_cNotPt_V4 :
- case Hexagon::STh_GP_cdnPt_V4 :
- case Hexagon::STh_GP_cdnNotPt_V4 :
- case Hexagon::STw_GP_cPt_V4 :
- case Hexagon::STw_GP_cNotPt_V4 :
- case Hexagon::STw_GP_cdnPt_V4 :
- case Hexagon::STw_GP_cdnNotPt_V4 :
-
- // V4 predicated global address new value store.
- case Hexagon::STrib_GP_cPt_nv_V4 :
- case Hexagon::STrib_GP_cNotPt_nv_V4 :
- case Hexagon::STrib_GP_cdnPt_nv_V4 :
- case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
- case Hexagon::STrih_GP_cPt_nv_V4 :
- case Hexagon::STrih_GP_cNotPt_nv_V4 :
- case Hexagon::STrih_GP_cdnPt_nv_V4 :
- case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
- case Hexagon::STriw_GP_cPt_nv_V4 :
- case Hexagon::STriw_GP_cNotPt_nv_V4 :
- case Hexagon::STriw_GP_cdnPt_nv_V4 :
- case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
- case Hexagon::STb_GP_cPt_nv_V4 :
- case Hexagon::STb_GP_cNotPt_nv_V4 :
- case Hexagon::STb_GP_cdnPt_nv_V4 :
- case Hexagon::STb_GP_cdnNotPt_nv_V4 :
- case Hexagon::STh_GP_cPt_nv_V4 :
- case Hexagon::STh_GP_cNotPt_nv_V4 :
- case Hexagon::STh_GP_cdnPt_nv_V4 :
- case Hexagon::STh_GP_cdnNotPt_nv_V4 :
- case Hexagon::STw_GP_cPt_nv_V4 :
- case Hexagon::STw_GP_cNotPt_nv_V4 :
- case Hexagon::STw_GP_cdnPt_nv_V4 :
- case Hexagon::STw_GP_cdnNotPt_nv_V4 :
-
- // TFR_FI
- case Hexagon::TFR_FI_immext_V4:
-
- // TFRI_F
- case Hexagon::TFRI_f:
- case Hexagon::TFRI_cPt_f:
- case Hexagon::TFRI_cNotPt_f:
- case Hexagon::CONST64_Float_Real:
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
return true;
}
+ return false;
}
bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
@@ -835,264 +711,6 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
}
}
-unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
- switch(MI->getOpcode()) {
- default: llvm_unreachable("Unknown type of instruction.");
- // JMP_EQri
- case Hexagon::JMP_EQriPt_nv_V4:
- return Hexagon::JMP_EQriPt_ie_nv_V4;
- case Hexagon::JMP_EQriNotPt_nv_V4:
- return Hexagon::JMP_EQriNotPt_ie_nv_V4;
- case Hexagon::JMP_EQriPnt_nv_V4:
- return Hexagon::JMP_EQriPnt_ie_nv_V4;
- case Hexagon::JMP_EQriNotPnt_nv_V4:
- return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
-
- // JMP_EQri -- with -1
- case Hexagon::JMP_EQriPtneg_nv_V4:
- return Hexagon::JMP_EQriPtneg_ie_nv_V4;
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
- case Hexagon::JMP_EQriPntneg_nv_V4:
- return Hexagon::JMP_EQriPntneg_ie_nv_V4;
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
- return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_nv_V4:
- return Hexagon::JMP_EQrrPt_ie_nv_V4;
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
- case Hexagon::JMP_EQrrPnt_nv_V4:
- return Hexagon::JMP_EQrrPnt_ie_nv_V4;
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
- return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_nv_V4:
- return Hexagon::JMP_GTriPt_ie_nv_V4;
- case Hexagon::JMP_GTriNotPt_nv_V4:
- return Hexagon::JMP_GTriNotPt_ie_nv_V4;
- case Hexagon::JMP_GTriPnt_nv_V4:
- return Hexagon::JMP_GTriPnt_ie_nv_V4;
- case Hexagon::JMP_GTriNotPnt_nv_V4:
- return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
-
- // JMP_GTri -- with -1
- case Hexagon::JMP_GTriPtneg_nv_V4:
- return Hexagon::JMP_GTriPtneg_ie_nv_V4;
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
- case Hexagon::JMP_GTriPntneg_nv_V4:
- return Hexagon::JMP_GTriPntneg_ie_nv_V4;
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
- return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_nv_V4:
- return Hexagon::JMP_GTrrPt_ie_nv_V4;
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
- case Hexagon::JMP_GTrrPnt_nv_V4:
- return Hexagon::JMP_GTrrPnt_ie_nv_V4;
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
- return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_nv_V4:
- return Hexagon::JMP_GTUriPt_ie_nv_V4;
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUriPnt_nv_V4:
- return Hexagon::JMP_GTUriPnt_ie_nv_V4;
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
- return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_nv_V4:
- return Hexagon::JMP_GTUrrPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
-
- case Hexagon::TFR_FI:
- return Hexagon::TFR_FI_immext_V4;
-
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
- llvm_unreachable("Needs implementing.");
- }
-}
-
-unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
- switch(MI->getOpcode()) {
- default: llvm_unreachable("Unknown type of jump instruction.");
- // JMP_EQri
- case Hexagon::JMP_EQriPt_ie_nv_V4:
- return Hexagon::JMP_EQriPt_nv_V4;
- case Hexagon::JMP_EQriNotPt_ie_nv_V4:
- return Hexagon::JMP_EQriNotPt_nv_V4;
- case Hexagon::JMP_EQriPnt_ie_nv_V4:
- return Hexagon::JMP_EQriPnt_nv_V4;
- case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
- return Hexagon::JMP_EQriNotPnt_nv_V4;
-
- // JMP_EQri -- with -1
- case Hexagon::JMP_EQriPtneg_ie_nv_V4:
- return Hexagon::JMP_EQriPtneg_nv_V4;
- case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
- return Hexagon::JMP_EQriNotPtneg_nv_V4;
- case Hexagon::JMP_EQriPntneg_ie_nv_V4:
- return Hexagon::JMP_EQriPntneg_nv_V4;
- case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
- return Hexagon::JMP_EQriNotPntneg_nv_V4;
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_ie_nv_V4:
- return Hexagon::JMP_EQrrPt_nv_V4;
- case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
- return Hexagon::JMP_EQrrNotPt_nv_V4;
- case Hexagon::JMP_EQrrPnt_ie_nv_V4:
- return Hexagon::JMP_EQrrPnt_nv_V4;
- case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_EQrrNotPnt_nv_V4;
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_ie_nv_V4:
- return Hexagon::JMP_GTriPt_nv_V4;
- case Hexagon::JMP_GTriNotPt_ie_nv_V4:
- return Hexagon::JMP_GTriNotPt_nv_V4;
- case Hexagon::JMP_GTriPnt_ie_nv_V4:
- return Hexagon::JMP_GTriPnt_nv_V4;
- case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTriNotPnt_nv_V4;
-
- // JMP_GTri -- with -1
- case Hexagon::JMP_GTriPtneg_ie_nv_V4:
- return Hexagon::JMP_GTriPtneg_nv_V4;
- case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
- return Hexagon::JMP_GTriNotPtneg_nv_V4;
- case Hexagon::JMP_GTriPntneg_ie_nv_V4:
- return Hexagon::JMP_GTriPntneg_nv_V4;
- case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
- return Hexagon::JMP_GTriNotPntneg_nv_V4;
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_ie_nv_V4:
- return Hexagon::JMP_GTrrPt_nv_V4;
- case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
- return Hexagon::JMP_GTrrNotPt_nv_V4;
- case Hexagon::JMP_GTrrPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrPnt_nv_V4;
- case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrNotPnt_nv_V4;
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnPt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnPnt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_ie_nv_V4:
- return Hexagon::JMP_GTUriPt_nv_V4;
- case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUriNotPt_nv_V4;
- case Hexagon::JMP_GTUriPnt_ie_nv_V4:
- return Hexagon::JMP_GTUriPnt_nv_V4;
- case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUriNotPnt_nv_V4;
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrPt_nv_V4;
- case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrNotPt_nv_V4;
- case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrPnt_nv_V4;
- case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrNotPnt_nv_V4;
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnPt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnPnt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
- }
-}
-
-
bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: return false;
@@ -1101,7 +719,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrib_indexed_nv_V4:
case Hexagon::STrib_indexed_shl_nv_V4:
case Hexagon::STrib_shl_nv_V4:
- case Hexagon::STrib_GP_nv_V4:
case Hexagon::STb_GP_nv_V4:
case Hexagon::POST_STbri_nv_V4:
case Hexagon::STrib_cPt_nv_V4:
@@ -1124,10 +741,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STb_GP_cNotPt_nv_V4:
case Hexagon::STb_GP_cdnPt_nv_V4:
case Hexagon::STb_GP_cdnNotPt_nv_V4:
- case Hexagon::STrib_GP_cPt_nv_V4:
- case Hexagon::STrib_GP_cNotPt_nv_V4:
- case Hexagon::STrib_GP_cdnPt_nv_V4:
- case Hexagon::STrib_GP_cdnNotPt_nv_V4:
case Hexagon::STrib_abs_nv_V4:
case Hexagon::STrib_abs_cPt_nv_V4:
case Hexagon::STrib_abs_cdnPt_nv_V4:
@@ -1144,7 +757,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrih_indexed_nv_V4:
case Hexagon::STrih_indexed_shl_nv_V4:
case Hexagon::STrih_shl_nv_V4:
- case Hexagon::STrih_GP_nv_V4:
case Hexagon::STh_GP_nv_V4:
case Hexagon::POST_SThri_nv_V4:
case Hexagon::STrih_cPt_nv_V4:
@@ -1167,10 +779,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STh_GP_cNotPt_nv_V4:
case Hexagon::STh_GP_cdnPt_nv_V4:
case Hexagon::STh_GP_cdnNotPt_nv_V4:
- case Hexagon::STrih_GP_cPt_nv_V4:
- case Hexagon::STrih_GP_cNotPt_nv_V4:
- case Hexagon::STrih_GP_cdnPt_nv_V4:
- case Hexagon::STrih_GP_cdnNotPt_nv_V4:
case Hexagon::STrih_abs_nv_V4:
case Hexagon::STrih_abs_cPt_nv_V4:
case Hexagon::STrih_abs_cdnPt_nv_V4:
@@ -1187,7 +795,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STriw_indexed_nv_V4:
case Hexagon::STriw_indexed_shl_nv_V4:
case Hexagon::STriw_shl_nv_V4:
- case Hexagon::STriw_GP_nv_V4:
case Hexagon::STw_GP_nv_V4:
case Hexagon::POST_STwri_nv_V4:
case Hexagon::STriw_cPt_nv_V4:
@@ -1210,10 +817,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STw_GP_cNotPt_nv_V4:
case Hexagon::STw_GP_cdnPt_nv_V4:
case Hexagon::STw_GP_cdnNotPt_nv_V4:
- case Hexagon::STriw_GP_cPt_nv_V4:
- case Hexagon::STriw_GP_cNotPt_nv_V4:
- case Hexagon::STriw_GP_cdnPt_nv_V4:
- case Hexagon::STriw_GP_cdnNotPt_nv_V4:
case Hexagon::STriw_abs_nv_V4:
case Hexagon::STriw_abs_cPt_nv_V4:
case Hexagon::STriw_abs_cdnPt_nv_V4:
@@ -1305,6 +908,16 @@ bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
}
}
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ if (isNewValueJump(MI))
+ return true;
+
+ if (isNewValueStore(MI))
+ return true;
+
+ return false;
+}
+
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
}
@@ -1506,26 +1119,11 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
return Hexagon::JMPR_cPt;
// V4 indexed+scaled load.
- case Hexagon::LDrid_indexed_cPt_V4:
- return Hexagon::LDrid_indexed_cNotPt_V4;
- case Hexagon::LDrid_indexed_cNotPt_V4:
- return Hexagon::LDrid_indexed_cPt_V4;
-
case Hexagon::LDrid_indexed_shl_cPt_V4:
return Hexagon::LDrid_indexed_shl_cNotPt_V4;
case Hexagon::LDrid_indexed_shl_cNotPt_V4:
return Hexagon::LDrid_indexed_shl_cPt_V4;
- case Hexagon::LDrib_indexed_cPt_V4:
- return Hexagon::LDrib_indexed_cNotPt_V4;
- case Hexagon::LDrib_indexed_cNotPt_V4:
- return Hexagon::LDrib_indexed_cPt_V4;
-
- case Hexagon::LDriub_indexed_cPt_V4:
- return Hexagon::LDriub_indexed_cNotPt_V4;
- case Hexagon::LDriub_indexed_cNotPt_V4:
- return Hexagon::LDriub_indexed_cPt_V4;
-
case Hexagon::LDrib_indexed_shl_cPt_V4:
return Hexagon::LDrib_indexed_shl_cNotPt_V4;
case Hexagon::LDrib_indexed_shl_cNotPt_V4:
@@ -1536,16 +1134,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::LDriub_indexed_shl_cNotPt_V4:
return Hexagon::LDriub_indexed_shl_cPt_V4;
- case Hexagon::LDrih_indexed_cPt_V4:
- return Hexagon::LDrih_indexed_cNotPt_V4;
- case Hexagon::LDrih_indexed_cNotPt_V4:
- return Hexagon::LDrih_indexed_cPt_V4;
-
- case Hexagon::LDriuh_indexed_cPt_V4:
- return Hexagon::LDriuh_indexed_cNotPt_V4;
- case Hexagon::LDriuh_indexed_cNotPt_V4:
- return Hexagon::LDriuh_indexed_cPt_V4;
-
case Hexagon::LDrih_indexed_shl_cPt_V4:
return Hexagon::LDrih_indexed_shl_cNotPt_V4;
case Hexagon::LDrih_indexed_shl_cNotPt_V4:
@@ -1556,11 +1144,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
return Hexagon::LDriuh_indexed_shl_cPt_V4;
- case Hexagon::LDriw_indexed_cPt_V4:
- return Hexagon::LDriw_indexed_cNotPt_V4;
- case Hexagon::LDriw_indexed_cNotPt_V4:
- return Hexagon::LDriw_indexed_cPt_V4;
-
case Hexagon::LDriw_indexed_shl_cPt_V4:
return Hexagon::LDriw_indexed_shl_cNotPt_V4;
case Hexagon::LDriw_indexed_shl_cNotPt_V4:
@@ -1686,26 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::STw_GP_cNotPt_V4:
return Hexagon::STw_GP_cPt_V4;
- case Hexagon::STrid_GP_cPt_V4:
- return Hexagon::STrid_GP_cNotPt_V4;
- case Hexagon::STrid_GP_cNotPt_V4:
- return Hexagon::STrid_GP_cPt_V4;
-
- case Hexagon::STrib_GP_cPt_V4:
- return Hexagon::STrib_GP_cNotPt_V4;
- case Hexagon::STrib_GP_cNotPt_V4:
- return Hexagon::STrib_GP_cPt_V4;
-
- case Hexagon::STrih_GP_cPt_V4:
- return Hexagon::STrih_GP_cNotPt_V4;
- case Hexagon::STrih_GP_cNotPt_V4:
- return Hexagon::STrih_GP_cPt_V4;
-
- case Hexagon::STriw_GP_cPt_V4:
- return Hexagon::STriw_GP_cNotPt_V4;
- case Hexagon::STriw_GP_cNotPt_V4:
- return Hexagon::STriw_GP_cPt_V4;
-
// Load.
case Hexagon::LDrid_cPt:
return Hexagon::LDrid_cNotPt;
@@ -1971,75 +1534,26 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
Hexagon::JMPR_cNotPt;
// V4 indexed+scaled load.
- case Hexagon::LDrid_indexed_V4:
- return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
- Hexagon::LDrid_indexed_cNotPt_V4;
case Hexagon::LDrid_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
Hexagon::LDrid_indexed_shl_cNotPt_V4;
- case Hexagon::LDrib_indexed_V4:
- return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
- Hexagon::LDrib_indexed_cNotPt_V4;
- case Hexagon::LDriub_indexed_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
- Hexagon::LDriub_indexed_cNotPt_V4;
- case Hexagon::LDriub_ae_indexed_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
- Hexagon::LDriub_indexed_cNotPt_V4;
case Hexagon::LDrib_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
Hexagon::LDrib_indexed_shl_cNotPt_V4;
case Hexagon::LDriub_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDriub_ae_indexed_shl_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
- Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDrih_indexed_V4:
- return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
- Hexagon::LDrih_indexed_cNotPt_V4;
- case Hexagon::LDriuh_indexed_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
- Hexagon::LDriuh_indexed_cNotPt_V4;
- case Hexagon::LDriuh_ae_indexed_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
- Hexagon::LDriuh_indexed_cNotPt_V4;
case Hexagon::LDrih_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
Hexagon::LDrih_indexed_shl_cNotPt_V4;
case Hexagon::LDriuh_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriuh_ae_indexed_shl_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
- Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriw_indexed_V4:
- return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
- Hexagon::LDriw_indexed_cNotPt_V4;
case Hexagon::LDriw_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
Hexagon::LDriw_indexed_shl_cNotPt_V4;
// V4 Load from global address
- case Hexagon::LDrid_GP_V4:
- return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 :
- Hexagon::LDrid_GP_cNotPt_V4;
- case Hexagon::LDrib_GP_V4:
- return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 :
- Hexagon::LDrib_GP_cNotPt_V4;
- case Hexagon::LDriub_GP_V4:
- return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 :
- Hexagon::LDriub_GP_cNotPt_V4;
- case Hexagon::LDrih_GP_V4:
- return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 :
- Hexagon::LDrih_GP_cNotPt_V4;
- case Hexagon::LDriuh_GP_V4:
- return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 :
- Hexagon::LDriuh_GP_cNotPt_V4;
- case Hexagon::LDriw_GP_V4:
- return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 :
- Hexagon::LDriw_GP_cNotPt_V4;
-
case Hexagon::LDd_GP_V4:
return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 :
Hexagon::LDd_GP_cNotPt_V4;
@@ -2122,19 +1636,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
Hexagon::STrid_indexed_shl_cNotPt_V4;
// V4 Store to global address
- case Hexagon::STrid_GP_V4:
- return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 :
- Hexagon::STrid_GP_cNotPt_V4;
- case Hexagon::STrib_GP_V4:
- return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 :
- Hexagon::STrib_GP_cNotPt_V4;
- case Hexagon::STrih_GP_V4:
- return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 :
- Hexagon::STrih_GP_cNotPt_V4;
- case Hexagon::STriw_GP_V4:
- return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 :
- Hexagon::STriw_GP_cNotPt_V4;
-
case Hexagon::STd_GP_V4:
return !invertPredicate ? Hexagon::STd_GP_cPt_V4 :
Hexagon::STd_GP_cNotPt_V4;
@@ -2221,38 +1722,141 @@ PredicateInstruction(MachineInstr *MI,
assert (isPredicable(MI) && "Expected predicable instruction");
bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
(Cond[0].getImm() == 0));
+
+ // This will change MI's opcode to its predicate version.
+ // However, its operand list is still the old one, i.e. the
+ // non-predicate one.
MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
- //
- // This assumes that the predicate is always the first operand
- // in the set of inputs.
- //
- MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
- int oper;
- for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
- MachineOperand MO = MI->getOperand(oper);
- if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
- break;
- }
- if (MO.isReg()) {
- MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
- MO.isImplicit(), MO.isKill(),
- MO.isDead(), MO.isUndef(),
- MO.isDebug());
- } else if (MO.isImm()) {
- MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
- } else {
- llvm_unreachable("Unexpected operand type");
+ int oper = -1;
+ unsigned int GAIdx = 0;
+
+ // Indicates whether the current MI has a GlobalAddress operand
+ bool hasGAOpnd = false;
+ std::vector<MachineOperand> tmpOpnds;
+
+ // Indicates whether we need to shift operands to right.
+ bool needShift = true;
+
+ // The predicate is ALWAYS the FIRST input operand !!!
+ if (MI->getNumOperands() == 0) {
+ // The non-predicate version of MI does not take any operands,
+ // i.e. no outs and no ins. In this condition, the predicate
+ // operand will be directly placed at Operands[0]. No operand
+ // shift is needed.
+ // Example: BARRIER
+ needShift = false;
+ oper = -1;
+ }
+ else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
+ && MI->getOperand(MI->getNumOperands()-1).isDef()
+ && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
+ // The non-predicate version of MI does not have any input operands.
+ // In this condition, we extend the length of Operands[] by one and
+ // copy the original last operand to the newly allocated slot.
+ // At this moment, it is just a place holder. Later, we will put
+ // predicate operand directly into it. No operand shift is needed.
+ // Example: r0=BARRIER (this is a faked insn used here for illustration)
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ needShift = false;
+ oper = MI->getNumOperands() - 2;
+ }
+ else {
+ // We need to right shift all input operands by one. Duplicate the
+ // last operand into the newly allocated slot.
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ }
+
+ if (needShift)
+ {
+ // Operands[ MI->getNumOperands() - 2 ] has been copied into
+ // Operands[ MI->getNumOperands() - 1 ], so we start from
+ // Operands[ MI->getNumOperands() - 3 ].
+ // oper is a signed int.
+ // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
+ {
+ MachineOperand &MO = MI->getOperand(oper);
+
+ // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7]
+ // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
+ // /\~
+ // /||\~
+ // ||
+ // Predicate Operand here
+ if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
+ break;
+ }
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ }
+ else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ }
+ else if (MO.isGlobal()) {
+ // MI can not have more than one GlobalAddress operand.
+ assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
+
+ // There is no member function called "ChangeToGlobalAddress" in the
+ // MachineOperand class (not like "ChangeToRegister" and
+ // "ChangeToImmediate"). So we have to remove them from Operands[] list
+ // first, and then add them back after we have inserted the predicate
+ // operand. tmpOpnds[] is to remember these operands before we remove
+ // them.
+ tmpOpnds.push_back(MO);
+
+ // Operands[oper] is a GlobalAddress operand;
+ // Operands[oper+1] has been copied into Operands[oper+2];
+ hasGAOpnd = true;
+ GAIdx = oper;
+ continue;
+ }
+ else {
+ assert(false && "Unexpected operand type");
+ }
}
}
int regPos = invertJump ? 1 : 0;
MachineOperand PredMO = Cond[regPos];
+
+ // [oper] now points to the last explicit Def. Predicate operand must be
+ // located at [oper+1]. See diagram above.
+ // This assumes that the predicate is always the first operand,
+ // i.e. Operands[0+numResults], in the set of inputs
+ // It is better to have an assert here to check this. But I don't know how
+ // to write this assert because findFirstPredOperandIdx() would return -1
+ if (oper < -1) oper = -1;
MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
PredMO.isImplicit(), PredMO.isKill(),
PredMO.isDead(), PredMO.isUndef(),
PredMO.isDebug());
+ if (hasGAOpnd)
+ {
+ unsigned int i;
+
+ // Operands[GAIdx] is the original GlobalAddress operand, which is
+ // already copied into tmpOpnds[0].
+ // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
+ // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
+ // so we start from [GAIdx+2]
+ for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
+ tmpOpnds.push_back(MI->getOperand(i));
+
+ // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
+ // It is very important that we always remove from the end of Operands[]
+ // MI->getNumOperands() is at least 2 if program goes to here.
+ for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
+ MI->RemoveOperand(i);
+
+ for (i = 0; i < tmpOpnds.size(); ++i)
+ MI->addOperand(tmpOpnds[i]);
+ }
+
return true;
}
@@ -2286,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ assert(isPredicated(MI));
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
bool
HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
@@ -2354,29 +1965,34 @@ isValidOffset(const int Opcode, const int Offset) const {
// the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
switch(Opcode) {
case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
case Hexagon::LDriw_f:
+ case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
- assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
case Hexagon::LDrid_f:
case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
- assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -2391,54 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const {
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
return (0 <= Offset && Offset <= 255);
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
return (0 <= Offset && Offset <= 127);
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
return (0 <= Offset && Offset <= 63);
// LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -2447,6 +2037,9 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDriw_pred:
return true;
+ case Hexagon::LOOP0_i:
+ return isUInt<10>(Offset);
+
// INLINEASM is very special.
case Hexagon::INLINEASM:
return true;
@@ -2491,50 +2084,33 @@ isMemOp(const MachineInstr *MI) const {
switch (MI->getOpcode())
{
default: return false;
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
- return true;
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
+ case Hexagon::MemOPb_SETBITi_V4:
+ case Hexagon::MemOPh_SETBITi_V4:
+ case Hexagon::MemOPw_SETBITi_V4:
+ case Hexagon::MemOPb_CLRBITi_V4:
+ case Hexagon::MemOPh_CLRBITi_V4:
+ case Hexagon::MemOPw_CLRBITi_V4:
+ return true;
}
+ return false;
}
@@ -2661,28 +2237,16 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::POST_LDriub_cPt :
case Hexagon::POST_LDriub_cNotPt :
return QRI.Subtarget.hasV4TOps();
- case Hexagon::LDrid_indexed_cPt_V4 :
- case Hexagon::LDrid_indexed_cNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cPt_V4 :
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrib_indexed_cPt_V4 :
- case Hexagon::LDrib_indexed_cNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cPt_V4 :
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriub_indexed_cPt_V4 :
- case Hexagon::LDriub_indexed_cNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cPt_V4 :
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrih_indexed_cPt_V4 :
- case Hexagon::LDrih_indexed_cNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cPt_V4 :
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriuh_indexed_cPt_V4 :
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriw_indexed_cPt_V4 :
- case Hexagon::LDriw_indexed_cNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cPt_V4 :
case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
return QRI.Subtarget.hasV4TOps();
@@ -2765,14 +2329,6 @@ isConditionalStore (const MachineInstr* MI) const {
return QRI.Subtarget.hasV4TOps();
// V4 global address store before promoting to dot new.
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
case Hexagon::STd_GP_cPt_V4 :
case Hexagon::STd_GP_cNotPt_V4 :
case Hexagon::STb_GP_cPt_V4 :
@@ -2813,7 +2369,33 @@ isConditionalStore (const MachineInstr* MI) const {
}
}
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
+ return (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)));
+}
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+}
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
DFAPacketizer *HexagonInstrInfo::
CreateTargetScheduleState(const TargetMachine *TM,
@@ -2840,3 +2422,155 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
+
+bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
+
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable = (F >> HexagonII::ExtendablePos)
+ & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const {
+ // Constant extenders are allowed only for V4 and above.
+ if (!Subtarget.hasV4TOps())
+ return false;
+
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Returns Operand Index for the constant extended instruction.
+unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
+
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
+
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+ if (NonExtOpcode >= 0)
+ return NonExtOpcode;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retreive non-ext equivalent instruction.
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ case HexagonII::BaseImmOffset :
+ return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2bb53f899ce1..5df13a88b5d3 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,8 +16,8 @@
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -66,6 +66,10 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const;
+
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -123,6 +127,7 @@ public:
const BranchProbability &Probability) const;
virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool isPredicatedNew(const MachineInstr *MI) const;
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
virtual bool
@@ -136,6 +141,11 @@ public:
isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
const BranchProbability &Probability) const;
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
virtual DFAPacketizer*
CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
@@ -165,6 +175,8 @@ public:
bool isConditionalALU32 (const MachineInstr* MI) const;
bool isConditionalLoad (const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
unsigned getInvertedPredicatedOpcode(const int Opc) const;
bool isExtendable(const MachineInstr* MI) const;
@@ -173,9 +185,18 @@ public:
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueJump(const MachineInstr* MI) const;
bool isNewValueJumpCandidate(const MachineInstr *MI) const;
- unsigned getImmExtForm(const MachineInstr* MI) const;
- unsigned getNormalBranchForm(const MachineInstr* MI) const;
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool isConstExtended(MachineInstr *MI) const;
+ unsigned getAddrMode(const MachineInstr* MI) const;
+ bool isOperandExtended(const MachineInstr *MI,
+ unsigned short OperandNum) const;
+ unsigned short getCExtOpNum(const MachineInstr *MI) const;
+ int getMinValue(const MachineInstr *MI) const;
+ int getMaxValue(const MachineInstr *MI) const;
+ bool NonExtEquivalentExists (const MachineInstr *MI) const;
+ short getNonExtOpcode(const MachineInstr *MI) const;
private:
int getMatchingCondBranchOpcode(int Opc, bool sense) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 1d4a7060adf0..74dc0ca72a04 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -12,88 +12,7 @@
//===----------------------------------------------------------------------===//
include "HexagonInstrFormats.td"
-include "HexagonImmediates.td"
-
-//===----------------------------------------------------------------------===//
-// Classes used for relation maps.
-//===----------------------------------------------------------------------===//
-// PredRel - Filter class used to relate non-predicated instructions with their
-// predicated forms.
-class PredRel;
-// PredNewRel - Filter class used to relate predicated instructions with their
-// predicate-new forms.
-class PredNewRel: PredRel;
-// ImmRegRel - Filter class used to relate instructions having reg-reg form
-// with their reg-imm counterparts.
-class ImmRegRel;
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Predicate Definitions.
-//===----------------------------------------------------------------------===//
-def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
-def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
-def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
-def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
-def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
-def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
-def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget.hasV5TOps()">;
-def NoV5T : Predicate<"!Subtarget.hasV5TOps()">;
-def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
-def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
-
-// Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
-
-// Address operands.
-def MEMrr : Operand<i32> {
- let PrintMethod = "printMEMrrOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-// Address operands
-def MEMri : Operand<i32> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
- let PrintMethod = "printFrameIndexOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-let PrintMethod = "printGlobalOperand" in
- def globaladdress : Operand<i32>;
-
-let PrintMethod = "printJumpTable" in
- def jumptablebase : Operand<i32>;
-
-def brtarget : Operand<OtherVT>;
-def calltarget : Operand<i32>;
-
-def bblabel : Operand<i32>;
-def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
- let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
- let PrintMethod = "printSymbolLo";
-}
+include "HexagonOperands.td"
// Multi-class for logical operators.
multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
@@ -122,40 +41,54 @@ multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
(OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
}
-multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
- def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), s10ImmPred:$c))]>;
+multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
+ let CextOpcode = CextOp in {
+ let InputType = "reg" in
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1,
+ opExtentBits = 10, InputType = "imm" in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>;
+ }
}
-multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
- def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), u9ImmPred:$c))]>;
+multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
+ let CextOpcode = CextOp in {
+ let InputType = "reg" in
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+ let isExtendable = 1, opExtendable = 2, isExtentSigned = 0,
+ opExtentBits = 9, InputType = "imm" in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set (i1 PredRegs:$dst),
+ (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>;
+ }
}
multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
[(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
- u8ImmPred:$c))]>;
+ u8ExtPred:$c))]>;
}
multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
!strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
[(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
- s8ImmPred:$c))]>;
+ s8ExtPred:$c))]>;
}
}
@@ -165,8 +98,8 @@ multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
multiclass ALU32_Pbase<string mnemonic, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, $src3)",
@@ -174,10 +107,10 @@ multiclass ALU32_Pbase<string mnemonic, bit isNot,
}
multiclass ALU32_Pred<string mnemonic, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
}
}
@@ -185,7 +118,7 @@ let InputType = "reg" in
multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in {
let isPredicable = 1 in
- def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = "#mnemonic#"($src1, $src2)",
[(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
@@ -211,33 +144,35 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
// ALU32/ALU (ADD with register-immediate form)
//===----------------------------------------------------------------------===//
multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s8Imm: $src3),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, #$src3)",
[]>;
}
multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
}
}
-let InputType = "imm" in
+let isExtendable = 1, InputType = "imm" in
multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
- let isPredicable = 1 in
- def #NAME# : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s16Imm:$src2),
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
+ isPredicable = 1 in
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s16Ext:$src2),
"$dst = "#mnemonic#"($src1, #$src2)",
[(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
- (s16ImmPred:$src2)))]>;
+ (s16ExtPred:$src2)))]>;
- let neverHasSideEffects = 1, isPredicated = 1 in {
+ let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, isPredicated = 1 in {
defm Pt : ALU32ri_Pred<mnemonic, 0>;
defm NotPt : ALU32ri_Pred<mnemonic, 1>;
}
@@ -246,23 +181,26 @@ multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "OR", InputType = "imm" in
def OR_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s10Imm:$src2),
+ (ins IntRegs:$src1, s10Ext:$src2),
"$dst = or($src1, #$src2)",
[(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
- s10ImmPred:$src2))]>;
+ s10ExtPred:$src2))]>, ImmRegRel;
def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1),
"$dst = not($src1)",
[(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", CextOpcode = "AND" in
def AND_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s10Imm:$src2),
+ (ins IntRegs:$src1, s10Ext:$src2),
"$dst = and($src1, #$src2)",
[(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
- s10ImmPred:$src2))]>;
-
+ s10ExtPred:$src2))]>, ImmRegRel;
// Negate.
def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = neg($src1)",
@@ -274,27 +212,138 @@ def NOP : ALU32_rr<(outs), (ins),
[]>;
// Rd32=sub(#s10,Rs32)
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "SUB", InputType = "imm" in
def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
- (ins s10Imm:$src1, IntRegs:$src2),
+ (ins s10Ext:$src1, IntRegs:$src2),
"$dst = sub(#$src1, $src2)",
- [(set IntRegs:$dst, (sub s10ImmPred:$src1, IntRegs:$src2))]>;
-
-// Transfer immediate.
-let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
-def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
- "$dst = #$src1",
- [(set (i32 IntRegs:$dst), s16ImmPred:$src1)]>;
-
-// Transfer register.
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = $src1",
- []>;
+ [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
+ ImmRegRel;
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR64 : ALU32_ri<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = $src1",
- []>;
+
+multiclass TFR_Pred<bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
+ []>;
+ // Predicate new
+ let PNewValue = "new" in
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
+ []>;
+ }
+}
+
+let InputType = "reg", neverHasSideEffects = 1 in
+multiclass TFR_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let isPredicable = 1 in
+ def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+
+ let isPredicated = 1 in {
+ defm Pt : TFR_Pred<0>;
+ defm NotPt : TFR_Pred<1>;
+ }
+ }
+}
+
+class T_TFR64_Pred<bit PredNot, bit isPredNew>
+ : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#
+ !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []>
+{
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let IClass = 0b1111;
+ let Inst{27-24} = 0b1101;
+ let Inst{13} = isPredNew;
+ let Inst{7} = PredNot;
+ let Inst{4-0} = dst;
+ let Inst{6-5} = src1;
+ let Inst{20-17} = src2{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src2{4-1};
+ let Inst{8} = 0b0;
+}
+
+multiclass TFR64_Pred<bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : T_TFR64_Pred<PredNot, 0>;
+
+ let PNewValue = "new" in
+ def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
+ }
+}
+
+let neverHasSideEffects = 1 in
+multiclass TFR64_base<string BaseName> {
+ let BaseOpcode = BaseName in {
+ let isPredicable = 1 in
+ def NAME : ALU32Inst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1),
+ "$dst = $src1" > {
+ bits<5> dst;
+ bits<5> src1;
+
+ let IClass = 0b1111;
+ let Inst{27-23} = 0b01010;
+ let Inst{4-0} = dst;
+ let Inst{20-17} = src1{4-1};
+ let Inst{16} = 0b1;
+ let Inst{12-9} = src1{4-1};
+ let Inst{8} = 0b0;
+ }
+
+ let isPredicated = 1 in {
+ defm Pt : TFR64_Pred<0>;
+ defm NotPt : TFR64_Pred<1>;
+ }
+ }
+}
+
+multiclass TFRI_Pred<bit PredNot> {
+ let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
+ []>;
+
+ // Predicate new
+ let PNewValue = "new" in
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
+ !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
+ []>;
+ }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
+multiclass TFRI_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
+ let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16,
+ isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in
+ def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+ "$dst = #$src1",
+ [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
+
+ let opExtendable = 2, opExtentBits = 12, neverHasSideEffects = 1,
+ isPredicated = 1 in {
+ defm Pt : TFRI_Pred<0>;
+ defm NotPt : TFRI_Pred<1>;
+ }
+ }
+}
+
+defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR64 : TFR64_base<"TFR64">, PredNewRel;
// Transfer control register.
let neverHasSideEffects = 1 in
@@ -311,17 +360,50 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// Combine.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = combine($src1, $src2)",
- []>;
-let neverHasSideEffects = 1 in
-def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
- (ins s8Imm:$src1, s8Imm:$src2),
- "$dst = combine(#$src1, #$src2)",
- []>;
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+ SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineRR :
+ SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2))))]>;
+
+// Rd=combine(Rt.[HL], Rs.[HL])
+class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1,
+ IntRegs:$src2),
+ "$dst = combine($src1."# A #", $src2."# B #")", []>;
+
+let isPredicable = 1 in {
+ def COMBINE_hh : COMBINE_halves<"H", "H">;
+ def COMBINE_hl : COMBINE_halves<"H", "L">;
+ def COMBINE_lh : COMBINE_halves<"L", "H">;
+ def COMBINE_ll : COMBINE_halves<"L", "L">;
+}
+
+def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
+ (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
+ (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+ ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
// Mux.
def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
@@ -330,66 +412,92 @@ def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
"$dst = vmux($src1, $src2, $src3)",
[]>;
+let CextOpcode = "MUX", InputType = "reg" in
def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst = mux($src1, $src2, $src3)",
- [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
- (i32 IntRegs:$src2),
- (i32 IntRegs:$src3))))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ (i32 IntRegs:$src3))))]>, ImmRegRel;
-def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
IntRegs:$src3),
"$dst = mux($src1, #$src2, $src3)",
- [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
- s8ImmPred:$src2,
- (i32 IntRegs:$src3))))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2,
+ (i32 IntRegs:$src3))))]>, ImmRegRel;
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
- s8Imm:$src3),
+ s8Ext:$src3),
"$dst = mux($src1, $src2, #$src3)",
- [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
- (i32 IntRegs:$src2),
- s8ImmPred:$src3)))]>;
+ [(set (i32 IntRegs:$dst),
+ (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+ s8ExtPred:$src3)))]>, ImmRegRel;
-def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
s8Imm:$src3),
"$dst = mux($src1, #$src2, #$src3)",
[(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
- s8ImmPred:$src2,
+ s8ExtPred:$src2,
s8ImmPred:$src3)))]>;
-// Shift halfword.
-let isPredicable = 1 in
-def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = aslh($src1)",
- [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>;
+// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
+multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+ let isPredicatedNew = isPredNew in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+ ") $dst = ")#mnemonic#"($src2)">,
+ Requires<[HasV4T]>;
+}
-let isPredicable = 1 in
-def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = asrh($src1)",
- [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>;
+multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+ }
+}
-// Sign extend.
-let isPredicable = 1 in
-def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = sxtb($src1)",
- [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>;
+multiclass ALU32_2op_base<string mnemonic> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, neverHasSideEffects = 1 in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = "#mnemonic#"($src1)">;
+
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
+ neverHasSideEffects = 1 in {
+ defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>;
+ defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+ }
+ }
+}
-let isPredicable = 1 in
-def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = sxth($src1)",
- [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>;
-
-// Zero extend.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = zxtb($src1)",
- []>;
+defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
+defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
+defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
+defm SXTH : ALU32_2op_base<"sxth">, PredNewRel;
+defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
+defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel;
+
+def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
+ (ASLH IntRegs:$src1)>;
+
+def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
+ (ASRH IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
+ (SXTB IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
+ (SXTH IntRegs:$src1)>;
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = zxth($src1)",
- []>;
//===----------------------------------------------------------------------===//
// ALU32/PERM -
//===----------------------------------------------------------------------===//
@@ -400,98 +508,66 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// Conditional combine.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1 in {
def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedFalse = 1 in
def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1 in
def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1, isPredicatedFalse = 1 in
def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst = combine($src2, $src3)",
[]>;
+}
-// Conditional transfer.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = $src2",
- []>;
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2),
- "if (!$src1) $dst = $src2",
- []>;
+def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = cl0($src1)",
+ [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2),
- "if ($src1) $dst = $src2",
- []>;
+def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = ct0($src1)",
+ [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2),
- "if (!$src1) $dst = $src2",
- []>;
+def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = cl0($src1)",
+ [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
- "if ($src1) $dst = #$src2",
- []>;
+def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = ct0($src1)",
+ [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
- s12Imm:$src2),
- "if (!$src1) $dst = #$src2",
- []>;
+def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2),
- "if ($src1.new) $dst = $src2",
- []>;
+def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2),
- "if (!$src1.new) $dst = $src2",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
- s12Imm:$src2),
- "if ($src1.new) $dst = #$src2",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
- s12Imm:$src2),
- "if (!$src1.new) $dst = #$src2",
- []>;
-
-// Compare.
-defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
-defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
-defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
//===----------------------------------------------------------------------===//
// ALU32/PRED -
//===----------------------------------------------------------------------===//
@@ -608,11 +684,6 @@ def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
// Subtract halfword.
-// Transfer register.
-let neverHasSideEffects = 1 in
-def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = $src1",
- []>;
//===----------------------------------------------------------------------===//
// ALU64/ALU -
//===----------------------------------------------------------------------===//
@@ -784,7 +855,7 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
// JR +
//===----------------------------------------------------------------------===//
def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
// Jump to address from register.
let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
@@ -818,241 +889,218 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
// LD +
//===----------------------------------------------------------------------===//
///
-/// Make sure that in post increment load, the first operand is always the post
-/// increment operand.
-///
-// Load doubleword.
-let isPredicable = 1 in
-def LDrid : LDInst<(outs DoubleRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memd($addr)",
- [(set (i64 DoubleRegs:$dst), (i64 (load ADDRriS11_3:$addr)))]>;
-
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, s11_3Imm:$offset),
- "$dst = memd($src1+#$offset)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (load (add (i32 IntRegs:$src1),
- s11_3ImmPred:$offset))))]>;
+// Load -- MEMri operand
+multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($addr)",
+ []>;
+}
-let neverHasSideEffects = 1 in
-def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memd(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
+multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
-let neverHasSideEffects = 1 in
-def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memd(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid : LDInst2PI<(outs DoubleRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memd($src1++#$offset)",
- [],
- "$src1 = $dst2">;
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
-// Load doubleword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memd($addr)",
- []>;
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
+ "$dst = "#mnemonic#"($addr)",
+ []>;
+ let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >;
+ defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >;
+ }
+ }
+}
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cNotPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memd($addr)",
- []>;
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+ defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel;
+ defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel;
+ defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel;
+ defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel;
+ defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel;
+ defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if ($src1) $dst = memd($src2+#$src3)",
- []>;
+def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
+ (LDrib ADDRriS11_0:$addr) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if (!$src1) $dst = memd($src2+#$src3)",
- []>;
+def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
+ (LDriub ADDRriS11_0:$addr) >;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if ($src1) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">;
+def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
+ (LDrih ADDRriS11_1:$addr) >;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cNotPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if (!$src1) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">;
+def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
+ (LDriuh ADDRriS11_1:$addr) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memd($addr)",
- []>;
+def : Pat < (i32 (load ADDRriS11_2:$addr)),
+ (LDriw ADDRriS11_2:$addr) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memd($addr)",
- []>;
+def : Pat < (i64 (load ADDRriS11_3:$addr)),
+ (LDrid ADDRriS11_3:$addr) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if ($src1.new) $dst = memd($src2+#$src3)",
- []>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
- "if (!$src1.new) $dst = memd($src2+#$src3)",
+// Load - Base with Immediate offset addressing mode
+multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+#$src3)",
[]>;
+}
+multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
-// Load byte.
-let isPredicable = 1 in
-def LDrib : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memb($addr)",
- [(set (i32 IntRegs:$dst), (i32 (sextloadi8 ADDRriS11_0:$addr)))]>;
-
-// Load byte any-extend.
-def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
- (i32 (LDrib ADDRriS11_0:$addr)) >;
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
-// Indexed load byte.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrib_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst = memb($src1+#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi8 (add (i32 IntRegs:$src1),
- s11_0ImmPred:$offset))))]>;
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1, AddedComplexity = 20 in
+ def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1+#$offset)",
+ []>;
-// Indexed load byte any-extend.
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
- (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
+ let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >;
+ defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >;
+ }
+ }
+}
-let neverHasSideEffects = 1 in
-def LDrib_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memb(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
+let addrMode = BaseImmOffset in {
+ defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext,
+ 11, 6>, AddrModeRel;
+ defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext,
+ 11, 6>, AddrModeRel;
+ defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext,
+ 12, 7>, AddrModeRel;
+ defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext,
+ 12, 7>, AddrModeRel;
+ defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext,
+ 13, 8>, AddrModeRel;
+ defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext,
+ 14, 9>, AddrModeRel;
+}
-let neverHasSideEffects = 1 in
-def LDb_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memb(#$global)",
- []>,
- Requires<[NoV4T]>;
+let AddedComplexity = 20 in {
+def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+ (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
-let neverHasSideEffects = 1 in
-def LDub_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memub(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memb($src1++#$offset)",
- [],
- "$src1 = $dst2">;
+def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+ (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
-// Load byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memb($addr)",
- []>;
+def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+ (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memb($addr)",
- []>;
+def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+ (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if ($src1) $dst = memb($src2+#$src3)",
- []>;
+def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
+ (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if (!$src1) $dst = memb($src2+#$src3)",
- []>;
+def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
+ (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
+}
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">;
+//===----------------------------------------------------------------------===//
+// Post increment load
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//===----------------------------------------------------------------------===//
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1) $dst1 = memb($src2++#$src3)",
+multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
[],
"$src2 = $dst2">;
+}
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memb($addr)",
- []>;
+multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memb($addr)",
- []>;
+multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if ($src1.new) $dst = memb($src2+#$src3)",
- []>;
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+ let isPredicated = 1 in {
+ defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if (!$src1.new) $dst = memb($src2+#$src3)",
- []>;
+let hasCtrlDep = 1, neverHasSideEffects = 1 in {
+ defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>,
+ PredNewRel;
+ defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>,
+ PredNewRel;
+}
+def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
-// Load halfword.
-let isPredicable = 1 in
-def LDrih : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memh($addr)",
- [(set (i32 IntRegs:$dst), (i32 (sextloadi16 ADDRriS11_1:$addr)))]>;
+// Load byte any-extend.
+def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrih_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst = memh($src1+#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi16 (add (i32 IntRegs:$src1),
- s11_1ImmPred:$offset))))]>;
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+ (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
(i32 (LDrih ADDRriS11_1:$addr))>;
@@ -1061,399 +1109,25 @@ let AddedComplexity = 20 in
def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
(i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
-let neverHasSideEffects = 1 in
-def LDrih_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memh(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memh(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDuh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memuh(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memh($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
-// Load halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if ($src1) $dst = memh($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if (!$src1) $dst = memh($src2+#$src3)",
- []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if ($src1.new) $dst = memh($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if (!$src1.new) $dst = memh($src2+#$src3)",
- []>;
-
-// Load unsigned byte.
-let isPredicable = 1 in
-def LDriub : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memub($addr)",
- [(set (i32 IntRegs:$dst), (i32 (zextloadi8 ADDRriS11_0:$addr)))]>;
-
+let AddedComplexity = 10 in
def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
(i32 (LDriub ADDRriS11_0:$addr))>;
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriub_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_0Imm:$offset),
- "$dst = memub($src1+#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi8 (add (i32 IntRegs:$src1),
- s11_0ImmPred:$offset))))]>;
-
let AddedComplexity = 20 in
def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
(i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
-let neverHasSideEffects = 1 in
-def LDriub_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memub(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memub($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
-// Load unsigned byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memub($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memub($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if ($src1) $dst = memub($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if (!$src1) $dst = memub($src2+#$src3)",
- []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memub($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memub($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if ($src1.new) $dst = memub($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
- "if (!$src1.new) $dst = memub($src2+#$src3)",
- []>;
-
-// Load unsigned halfword.
-let isPredicable = 1 in
-def LDriuh : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memuh($addr)",
- [(set (i32 IntRegs:$dst), (i32 (zextloadi16 ADDRriS11_1:$addr)))]>;
-
-// Indexed load unsigned halfword.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_1Imm:$offset),
- "$dst = memuh($src1+#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi16 (add (i32 IntRegs:$src1),
- s11_1ImmPred:$offset))))]>;
-
-let neverHasSideEffects = 1 in
-def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memuh(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memuh($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
-// Load unsigned halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memuh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memuh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if ($src1) $dst = memuh($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if (!$src1) $dst = memuh($src2+#$src3)",
- []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memuh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memuh($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if ($src1.new) $dst = memuh($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
- "if (!$src1.new) $dst = memuh($src2+#$src3)",
- []>;
-
-
-// Load word.
-let isPredicable = 1 in
-def LDriw : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr), "$dst = memw($addr)",
- [(set IntRegs:$dst, (i32 (load ADDRriS11_2:$addr)))]>;
-
// Load predicate.
-let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
-def LDriw_pred : LDInst<(outs PredRegs:$dst),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def LDriw_pred : LDInst2<(outs PredRegs:$dst),
(ins MEMri:$addr),
"Error; should not emit",
[]>;
-// Indexed load.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriw_indexed : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_2Imm:$offset),
- "$dst = memw($src1+#$offset)",
- [(set IntRegs:$dst, (i32 (load (add IntRegs:$src1,
- s11_2ImmPred:$offset))))]>;
-
-let neverHasSideEffects = 1 in
-def LDriw_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memw(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDw_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memw(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memw($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
-// Load word conditionally.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1) $dst = memw($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1) $dst = memw($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if ($src1) $dst = memw($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if (!$src1) $dst = memw($src2+#$src3)",
- []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if ($src1) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if (!$src1) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if ($src1.new) $dst = memw($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, MEMri:$addr),
- "if (!$src1.new) $dst = memw($addr)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if ($src1.new) $dst = memw($src2+#$src3)",
- []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
- "if (!$src1.new) $dst = memw($src2+#$src3)",
- []>;
-
// Deallocate stack frame.
let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
- def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1),
+ def DEALLOCFRAME : LDInst2<(outs), (ins),
"deallocframe",
[]>;
}
@@ -1482,57 +1156,65 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// Multiply and use lower result.
// Rd=+mpyi(Rs,#u8)
-def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2),
"$dst =+ mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- u8ImmPred:$src2))]>;
+ u8ExtPred:$src2))]>;
// Rd=-mpyi(Rs,#u8)
-def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
"$dst =- mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- n8ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1),
+ u8ImmPred:$src2)))]>;
// Rd=mpyi(Rs,#m9)
// s9 is NOT the same as m9 - but it works.. so far.
// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
// depending on the value of m9. See Arch Spec.
-def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+CextOpcode = "MPYI", InputType = "imm" in
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
"$dst = mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ImmPred:$src2))]>;
+ s9ExtPred:$src2))]>, ImmRegRel;
// Rd=mpyi(Rs,Rt)
+let CextOpcode = "MPYI", InputType = "reg" in
def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpyi($src1, $src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
+ (i32 IntRegs:$src2)))]>, ImmRegRel;
// Rx+=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8,
+CextOpcode = "MPYI_acc", InputType = "imm" in
def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
"$dst += mpyi($src2, #$src3)",
[(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3),
+ (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
// Rx+=mpyi(Rs,Rt)
+let CextOpcode = "MPYI_acc", InputType = "reg" in
def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += mpyi($src2, $src3)",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
// Rx-=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in
def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
"$dst -= mpyi($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u8ImmPred:$src3)))],
+ u8ExtPred:$src3)))],
"$src1 = $dst">;
// Multiply and use upper result.
@@ -1601,7 +1283,7 @@ def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
// Rxx-=mpyu(Rs,Rt)
def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += mpyu($src2, $src3)",
+ "$dst -= mpyu($src2, $src3)",
[(set (i64 DoubleRegs:$dst),
(sub (i64 DoubleRegs:$src1),
(mul (i64 (anyext (i32 IntRegs:$src2))),
@@ -1609,37 +1291,43 @@ def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
"$src1 = $dst">;
+let InputType = "reg", CextOpcode = "ADD_acc" in
def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst += add($src2, $src3)",
[(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
(i32 IntRegs:$src3)),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+InputType = "imm", CextOpcode = "ADD_acc" in
def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Imm:$src3),
+ IntRegs:$src2, s8Ext:$src3),
"$dst += add($src2, #$src3)",
[(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
- s8ImmPred:$src3),
+ s8_16ExtPred:$src3),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let CextOpcode = "SUB_acc", InputType = "reg" in
def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst -= add($src2, $src3)",
[(set (i32 IntRegs:$dst),
(sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "SUB_acc", InputType = "imm" in
def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Imm:$src3),
+ IntRegs:$src2, s8Ext:$src3),
"$dst -= add($src2, #$src3)",
[(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
(add (i32 IntRegs:$src2),
- s8ImmPred:$src3)))],
- "$src1 = $dst">;
+ s8_16ExtPred:$src3)))],
+ "$src1 = $dst">, ImmRegRel;
//===----------------------------------------------------------------------===//
// MTYPE/MPYH -
@@ -1670,282 +1358,219 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
// ST +
//===----------------------------------------------------------------------===//
///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-/// post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-/// last operand.
-///
// Store doubleword.
-let isPredicable = 1 in
-def STrid : STInst<(outs),
- (ins MEMri:$addr, DoubleRegs:$src1),
- "memd($addr) = $src1",
- [(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr)]>;
-
-// Indexed store double word.
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed : STInst<(outs),
- (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
- "memd($src1+#$src2) = $src3",
- [(store (i64 DoubleRegs:$src3),
- (add (i32 IntRegs:$src1), s11_3ImmPred:$src2))]>;
-let neverHasSideEffects = 1 in
-def STrid_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
- "memd(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
+//===----------------------------------------------------------------------===//
+// Post increment store
+//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
-def STd_GP : STInst2<(outs),
- (ins globaladdress:$global, DoubleRegs:$src),
- "memd(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STdri : STInstPI<(outs IntRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memd($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2),
- s4_3ImmPred:$offset))],
+multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3",
+ [],
"$src2 = $dst">;
+}
-// Store doubleword conditionally.
-// if ([!]Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_cPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
- "if ($src1) memd($addr) = $src2",
- []>;
+multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
- "if (!$src1) memd($addr) = $src2",
- []>;
+let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_cPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
- DoubleRegs:$src4),
- "if ($src1) memd($src2+#$src3) = $src4",
- []>;
+ let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ #mnemonic#"($src1++#$offset) = $src2",
+ [],
+ "$src1 = $dst">;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
- DoubleRegs:$src4),
- "if (!$src1) memd($src2+#$src3) = $src4",
- []>;
+defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
-// if ([!]Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if ($src1) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">;
-
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1,
- isPredicated = 1 in
-def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if (!$src1) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">;
+let isNVStorable = 0 in
+defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel;
+def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
-// Store byte.
-// memb(Rs+#s11:0)=Rt
-let isPredicable = 1 in
-def STrib : STInst<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memb($addr) = $src1",
- [(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed : STInst<(outs),
- (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
- "memb($src1+#$src2) = $src3",
- [(truncstorei8 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
- s11_0ImmPred:$src2))]>;
-
-// memb(gp+#u16:0)=Rt
-let neverHasSideEffects = 1 in
-def STrib_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
+def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-// memb(#global)=Rt
-let neverHasSideEffects = 1 in
-def STb_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
-// memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
- IntRegs:$src2,
- s4Imm:$offset),
- "memb($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_0ImmPred:$offset))],
- "$src2 = $dst">;
+def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
+ (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-// Store byte conditionally.
-// if ([!]Pv) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memb($addr) = $src2",
- []>;
+def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memb($addr) = $src2",
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with MEMri operand.
+//===----------------------------------------------------------------------===//
+multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($addr) = $src2",
[]>;
+}
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if ($src1) memb($src2+#$src3) = $src4",
- []>;
+multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if (!$src1) memb($src2+#$src3) = $src4",
- []>;
+ // Predicate new
+ let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+ defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
-// if ([!]Pv) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
-// if (!Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : STInst2<(outs),
+ (ins MEMri:$addr, RC:$src),
+ mnemonic#"($addr) = $src",
+ []>;
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>;
+ defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
-// Store halfword.
-// memh(Rs+#s11:1)=Rt
-let isPredicable = 1 in
-def STrih : STInst<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memh($addr) = $src1",
- [(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr)]>;
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+ defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+ let isNVStorable = 0 in
+ defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed : STInst<(outs),
- (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
- "memh($src1+#$src2) = $src3",
- [(truncstorei16 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
- s11_1ImmPred:$src2))]>;
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>;
-let neverHasSideEffects = 1 in
-def STrih_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
+ (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>;
-let neverHasSideEffects = 1 in
-def STh_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
+def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
+ (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>;
-// memh(Rx++#s4:1)=Rt.H
-// memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_SThri : STInstPI<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memh($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_1ImmPred:$offset))],
- "$src2 = $dst">;
+def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
+ (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>;
-// Store halfword conditionally.
-// if ([!]Pv) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memh($addr) = $src2",
- []>;
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memh($addr) = $src2",
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with base+immediate offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4",
[]>;
+}
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if ($src1) memh($src2+#$src3) = $src4",
- []>;
+multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+ defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if (!$src1) memh($src2+#$src3) = $src4",
+ // Predicate new
+ let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+ defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME : STInst2<(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3",
[]>;
-// if ([!]Pv) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
+ defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>;
+ defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, InputType = "reg" in {
+ defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel;
+ defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel;
+ defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel;
+ let isNVStorable = 0 in
+ defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+ u6_3Ext, 14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_0ExtPred:$offset)),
+ (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_1ExtPred:$offset)),
+ (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
-// if (!Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
+def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
+ s11_2ExtPred:$offset)),
+ (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset,
+ (i32 IntRegs:$src1))>;
+def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
+ s11_3ExtPred:$offset)),
+ (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset,
+ (i64 DoubleRegs:$src1))>;
+}
+
+// memh(Rx++#s4:1)=Rt.H
// Store word.
// Store predicate.
@@ -1955,90 +1580,6 @@ def STriw_pred : STInst2<(outs),
"Error; should not emit",
[]>;
-// memw(Rs+#s11:2)=Rt
-let isPredicable = 1 in
-def STriw : STInst<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memw($addr) = $src1",
- [(store (i32 IntRegs:$src1), ADDRriS11_2:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed : STInst<(outs),
- (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
- "memw($src1+#$src2) = $src3",
- [(store (i32 IntRegs:$src3),
- (add (i32 IntRegs:$src1), s11_2ImmPred:$src2))]>;
-
-let neverHasSideEffects = 1 in
-def STriw_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def STw_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STwri : STInstPI<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memw($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_2ImmPred:$offset))],
- "$src2 = $dst">;
-
-// Store word conditionally.
-// if ([!]Pv) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memw($addr) = $src2",
- []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memw($addr) = $src2",
- []>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if ($src1) memw($src2+#$src3) = $src4",
- []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cNotPt : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if (!$src1) memw($src2+#$src3) = $src4",
- []>;
-
-// if ([!]Pv) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-
-
// Allocate stack frame.
let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
def ALLOCFRAME : STInst2<(outs),
@@ -2241,7 +1782,7 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
[SDNPHasChain]>;
-let hasSideEffects = 1, isHexagonSolo = 1 in
+let hasSideEffects = 1, isSolo = 1 in
def BARRIER : SYSInst<(outs), (ins),
"barrier",
[(HexagonBARRIER)]>;
@@ -2316,9 +1857,9 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset),
- ":endloop0",
- []>;
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
}
// Support for generating global address.
@@ -2406,6 +1947,10 @@ def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst), imm:$global) ]>;
+// Map BlockAddress lowering to CONST32_Int_Real
+def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
+ (CONST32_Int_Real tblockaddress:$addr)>;
+
let isReMaterializable = 1, isMoveImm = 1 in
def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst = CONST32($label)",
@@ -2509,68 +2054,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
// Atomic load and store support
// 8 bit atomic load
-def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
(i32 (LDriub ADDRriS11_0:$src1))>;
def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
(i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
-
-
// 16 bit atomic load
-def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDuh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
(i32 (LDriuh ADDRriS11_1:$src1))>;
def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
(i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
-
-
-// 32 bit atomic load
-def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDw_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
(i32 (LDriw ADDRriS11_2:$src1))>;
def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
(i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
-
// 64 bit atomic load
-def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (LDd_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
(i64 (LDrid ADDRriS11_3:$src1))>;
@@ -2578,30 +2081,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
(i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
-// 64 bit atomic store
-def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
- (i64 DoubleRegs:$src1)),
- (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i64 DoubleRegs:$src1)),
- (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// 8 bit atomic store
-def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrib_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
(STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
@@ -2611,18 +2090,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
(i32 IntRegs:$src1))>;
-// 16 bit atomic store
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrih_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
(STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
@@ -2631,20 +2098,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
(STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
(i32 IntRegs:$src1))>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STriw_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
(STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
@@ -2713,198 +2166,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
(i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
-// Map from store(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDw_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (LDd_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd.
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDrih_GP tglobaladdr:$global, 0))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDriuh_GP tglobaladdr:$global, 0))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDuh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a
-// certain global variable takes only two constant values, it shrinks the
-// global to a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
(i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
@@ -3020,12 +2283,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
(STrib ADDRriS11_2:$addr, (TFRI 1))>;
-let AddedComplexity = 100 in
-// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
-// memw(#foo) = r0
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, (TFRI 1))>,
- Requires<[NoV4T]>;
// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
@@ -3181,23 +2438,54 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
// i1 -> i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[NoV4T]>;
// i32 -> i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
// i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+// i1 -> i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// i32 -> i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>;
@@ -3218,15 +2506,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+// When there is an offset we should prefer the pattern below over the pattern above.
+// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
+// So this complexity below is comfortably higher to allow for choosing the below.
+// If this is not done then we generate addresses such as
+// ********************************************
+// r1 = add (r0, #4)
+// r1 = memw(r1 + #0)
+// instead of
+// r1 = memw(r0 + #4)
+// ********************************************
+let AddedComplexity = 100 in
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// anyext i16 -> i64.
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
@@ -3331,6 +2645,11 @@ def BR_JT : JRInst<(outs), (ins IntRegs:$src),
"jumpr $src",
[(HexagonBR_JT (i32 IntRegs:$src))]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BRIND : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(brind (i32 IntRegs:$src))]>;
+
def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
def : Pat<(HexagonWrapperJT tjumptable:$dst),
@@ -3444,32 +2763,3 @@ include "HexagonInstrInfoV5.td"
//===----------------------------------------------------------------------===//
// V5 Instructions -
//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate non-predicate instructions with their
-// predicated formats - true and false.
-//
-
-def getPredOpcode : InstrMapping {
- let FilterClass = "PredRel";
- // Instructions with the same BaseOpcode and isNVStore values form a row.
- let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
- // Instructions with the same predicate sense form a column.
- let ColFields = ["PredSense"];
- // The key column is the unpredicated instructions.
- let KeyCol = [""];
- // Value columns are PredSense=true and PredSense=false
- let ValueCols = [["true"], ["false"]];
-}
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate predicated instructions with their .new
-// format.
-//
-def getPredNewOpcode : InstrMapping {
- let FilterClass = "PredNewRel";
- let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
- let ColFields = ["PNewValue"];
- let KeyCol = [""];
- let ValueCols = [["new"]];
-}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 70448fc7af38..cd0e4758968c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -12,10 +12,25 @@
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in
-def IMMEXT : Immext<(outs), (ins),
- "/* immext #... */",
- []>,
- Requires<[HasV4T]>;
+class T_Immext<dag ins> :
+ EXTENDERInst<(outs), ins, "immext(#$imm)", []>,
+ Requires<[HasV4T]>;
+
+def IMMEXT_b : T_Immext<(ins brtarget:$imm)>;
+def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
+def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
+def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
+
+// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
+
+// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
+
+def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
+ (HexagonCONST32 node:$addr), [{
+ return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
+}]>;
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
@@ -80,190 +95,63 @@ def IMMEXT : Immext<(outs), (ins),
//===----------------------------------------------------------------------===//
// ALU32 +
//===----------------------------------------------------------------------===//
-
-// Shift halfword.
-
-let isPredicated = 1 in
-def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-// Sign extend.
-
-let isPredicated = 1 in
-def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicated = 1 in
-def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-// Zero exten.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
// Generate frame index addresses.
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let neverHasSideEffects = 1, isReMaterializable = 1,
+isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins IntRegs:$src1, s32Imm:$offset),
"$dst = add($src1, ##$offset)",
[]>,
Requires<[HasV4T]>;
+// Rd=cmp.eq(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Preserve the TSTBIT generation
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ 1, 0))>;
+
+// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll
+// Rd=cmp.ne(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = !cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.eq(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.ne(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = !cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// ALU32 -
@@ -276,19 +164,44 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
// Combine
// Rdd=combine(Rs, #s8)
-let neverHasSideEffects = 1 in
-def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, s8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s8Ext:$src2),
"$dst = combine($src1, #$src2)",
[]>,
Requires<[HasV4T]>;
+
// Rdd=combine(#s8, Rs)
-let neverHasSideEffects = 1 in
-def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
- (ins s8Imm:$src1, IntRegs:$src2),
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, IntRegs:$src2),
"$dst = combine(#$src1, $src2)",
[]>,
Requires<[HasV4T]>;
+
+def HexagonWrapperCombineRI_V4 :
+ SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineIR_V4 :
+ SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+
+def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
+ (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
+ (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
+ Requires<[HasV4T]>;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
+ neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
+ (ins s8Imm:$src1, u6Ext:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ []>,
+ Requires<[HasV4T]>;
+
//===----------------------------------------------------------------------===//
// ALU32/PERM +
//===----------------------------------------------------------------------===//
@@ -300,1436 +213,310 @@ def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
// These absolute set addressing mode instructions accept immediate as
// an operand. We have duplicated these patterns to take global address.
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memd($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memd($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memb($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memb($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memh($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memub($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memub($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memuh($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memw($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memw($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
+}
// Following patterns are defined for absolute set addressing mode
// instruction which take global address as operand.
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memd($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memb($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memub($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memuh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memw($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
+}
-// Load doubleword.
-//
-// Make sure that in post increment load, the first operand is always the post
-// increment operand.
-//
-// Rdd=memd(Rs+Rt<<#u2)
-// Special case pattern for indexed load without offset which is easier to
-// match. AddedComplexity of this pattern should be lower than base+offset load
-// and lower yet than the more generic version with offset/shift below
-// Similar approach is taken for all other base+index loads.
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memd($src1+$src2<<#0)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (load (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memd($src1+$src2<<#$offset)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (load (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load doubleword conditionally.
-// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memd($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// Rdd=memd(Rt<<#u2+#U6)
-
-//// Load byte.
-// Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memb($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memub($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memub($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memb($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi8 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memub($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi8 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memub($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi8 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load byte conditionally.
-// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memb($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-//// Load unsigned byte conditionally.
-// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memub($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memb(Rt<<#u2+#U6)
-
-//// Load halfword
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memuh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memuh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memh($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi16 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memuh($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi16 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memuh($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi16 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load halfword conditionally.
-// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-//// Load unsigned halfword conditionally.
-// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memh(Rt<<#u2+#U6)
-
-//// Load word.
-// Load predicate: Fix for bug 5279.
-let neverHasSideEffects = 1 in
-def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst),
- (ins MEMri:$addr),
- "Error; should not emit",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memw($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (load (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memw($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (load (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load word conditionally.
-// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
-// if (Pv) Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1) $dst=memw($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
- u2Imm:$offset),
- "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memw(Rt<<#u2+#U6)
-
-
-// Post-inc Load, Predicated, Dot new
-
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if ($src1.new) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnNotPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if (!$src1.new) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1.new) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1.new) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1.new) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1.new) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1.new) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1.new) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1.new) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if ($src1.new) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if (!$src1.new) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-/// Load from global offset
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memd(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memd(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memd(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memd(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memd(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memb(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memb(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memb(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memb(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memb(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memub(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memh(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memuh(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memw(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memd(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memb(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memub(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memh(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memuh(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
+// multiclass for load instructions with base + register offset
+// addressing mode
+multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
+ []>, Requires<[HasV4T]>;
+}
-// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
+multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
+let neverHasSideEffects = 1 in
+multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst = "#mnemonic#"($src1+$src2<<#$offset)",
+ []>, Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >;
+ defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>;
+ }
+ }
+}
-// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
+let addrMode = BaseRegOffset in {
+ defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, AddrModeRel;
+ defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, AddrModeRel;
+ defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel;
+ defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel;
+ defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memw(#$global)",
- []>,
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+let AddedComplexity = 40 in {
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrib_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memw(##$global)",
- []>,
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memw(##$global)",
- []>,
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memw(##$global)",
- []>,
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrih_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memw(##$global)",
- []>,
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (load (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDriw_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
- Requires<[HasV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i64 (load (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$offset)))),
+ (LDrid_indexed_shl_V4 IntRegs:$src1,
+ IntRegs:$src2, u2ImmPred:$offset)>,
Requires<[HasV4T]>;
+}
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+let AddedComplexity = 10 in {
+def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-// Map from load(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
+}
-def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+// zext i1->i64
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
+ Requires<[HasV4T]>;
+// zext i8->i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i1->i64
+def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i16->i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// anyext i16->i64
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memh(#foo + x)
let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+// anyext i32->i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memuh(#foo + x)
let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
@@ -1747,80 +534,192 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
/// last operand.
///
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, u6Imm:$src2),
- "memd($dst1=#$src2) = $src1",
+ (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
+ "memd($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memb($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memb($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memh($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memh($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memw($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memw($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
+}
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, globaladdress:$src2),
+ (ins DoubleRegs:$src1, globaladdressExt:$src2),
"memd($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memb($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memh($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memw($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
+}
-// memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
- "memd($src1+$src2<<#$src3) = $src4",
- [(store (i64 DoubleRegs:$src4),
- (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
+// multiclass for store instructions with base + register offset addressing
+// mode
+multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ RC:$src5),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5",
+ []>,
Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+ mnemonic#"($src1+$src2<<#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >;
+ defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ RC:$src5),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ let isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+ mnemonic#"($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >;
+ defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseRegOffset, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+ defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>,
+ ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+
+ defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>,
+ ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+
+ defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>,
+ ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+
+ let isNVStorable = 0 in
+ defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2,
+ u2ImmPred:$src3))),
+ (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2,
+ u2ImmPred:$src3))),
+ (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i32 IntRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+ (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i64 DoubleRegs:$src4),
+ (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+ (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3, DoubleRegs:$src4)>;
+}
// memd(Ru<<#u2+#U6)=Rtt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
+validSubTargets = HasV4SubT in
def STrid_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
"memd($src1<<#$src2+#$src3) = $src4",
[(store (i64 DoubleRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memd(Rx++#s4:3)=Rtt
@@ -1834,143 +733,81 @@ def STrid_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memd(#u6)=Rtt
// TODO: needs to be implemented.
-// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
- "if ($src1.new) memd($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
- "if (!$src1.new) memd($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
- DoubleRegs:$src4),
- "if ($src1.new) memd($src2+#$src3) = $src4",
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
+multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
+}
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
- DoubleRegs:$src4),
- "if (!$src1.new) memd($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
+ }
+}
-// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
-// if (Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_shl_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- DoubleRegs:$src5),
- "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
+multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+ let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
+ mnemonic#"($src1+#$src2) = #$src3",
[]>,
Requires<[HasV4T]>;
-// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_shl_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- DoubleRegs:$src5),
- "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_shl_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- DoubleRegs:$src5),
- "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrid_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- DoubleRegs:$src5),
- "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in {
+ defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>;
+ defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >;
+ }
+ }
+}
-// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if ($src1.new) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">,
- Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, InputType = "imm",
+ validSubTargets = HasV4SubT in {
+ defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel;
+ defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel;
+ defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel;
+}
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-// if (!Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if (!$src1.new) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">,
- Requires<[HasV4T]>;
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
+ (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
+ u6_1ImmPred:$src2)),
+ (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
-// Store byte.
-// memb(Rs+#u6:0)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_imm_V4 : STInst<(outs),
- (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
- "memb($src1+#$src2) = #$src3",
- [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
- u6_0ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
+ (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+}
-// memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memb($src1+$src2<<#$src3) = $src4",
- [(truncstorei8 (i32 IntRegs:$src4),
- (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$src3)))]>,
- Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
// memb(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STrib_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memb($src1<<#$src2+#$src3) = $src4",
[(truncstorei8 (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memb(Rx++#s4:0:circ(Mu))=Rt
@@ -1980,185 +817,28 @@ def STrib_shl_V4 : STInst<(outs),
// memb(gp+#u16:0)=Rt
-// Store byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Rt
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
-// if (Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_imm_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
- "if ($src1) memb($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_imm_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
- "if ($src1.new) memb($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_imm_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
- "if (!$src1) memb($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_imm_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
- "if (!$src1.new) memb($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memb($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memb($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memb($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memb($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
-// if (Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memb($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-// if (Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1.new) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Rt
-// if (!Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1.new) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
// Store halfword.
// TODO: needs to be implemented
// memh(Re=#U6)=Rt.H
// memh(Rs+#s11:1)=Rt.H
-// memh(Rs+#u6:1)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_imm_V4 : STInst<(outs),
- (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
- "memh($src1+#$src2) = #$src3",
- [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
- u6_1ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
// memh(Rs+Ru<<#u2)=Rt.H
// TODO: needs to be implemented.
-// memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memh($src1+$src2<<#$src3) = $src4",
- [(truncstorei16 (i32 IntRegs:$src4),
- (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$src3)))]>,
- Requires<[HasV4T]>;
-
// memh(Ru<<#u2+#U6)=Rt.H
// memh(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STrih_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memh($src1<<#$src2+#$src3) = $src4",
[(truncstorei16 (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memh(Rx++#s4:1:circ(Mu))=Rt.H
@@ -2173,152 +853,13 @@ def STrih_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memh(#u6)=Rt.H
// if ([!]Pv[.new]) memh(#u6)=Rt
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
-// if (Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_imm_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
- "if ($src1) memh($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_imm_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
- "if ($src1.new) memh($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_imm_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
- "if (!$src1) memh($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_imm_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
- "if (!$src1.new) memh($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
// TODO: needs to be implemented.
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memh($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Rt
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memh($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memh($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memh($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
-// if (Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memh($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
// TODO: Needs to be implemented.
-// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-// if (Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1.new) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Rt
-// if (!Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1.new) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
// Store word.
// memw(Re=#U6)=Rt
// TODO: Needs to be implemented.
@@ -2331,34 +872,20 @@ def STriw_pred_V4 : STInst2<(outs),
[]>,
Requires<[HasV4T]>;
-
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_imm_V4 : STInst<(outs),
- (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
- "memw($src1+#$src2) = #$src3",
- [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
- u6_2ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memw($src1+$src2<<#$src3) = $src4",
- [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$src3)))]>,
- Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
+ (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+ Requires<[HasV4T]>;
// memw(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STriw_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memw($src1<<#$src2+#$src3) = $src4",
[(store (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memw(Rx++#s4:2)=Rt
@@ -2366,592 +893,6 @@ def STriw_shl_V4 : STInst<(outs),
// memw(Rx++I:circ(Mu))=Rt
// memw(Rx++Mu)=Rt
// memw(Rx++Mu:brev)=Rt
-// memw(gp+#u16:2)=Rt
-
-
-// Store word conditionally.
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
-// if (Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_imm_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
- "if ($src1) memw($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_imm_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
- "if ($src1.new) memw($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_imm_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
- "if (!$src1) memw($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_imm_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
- "if (!$src1.new) memw($src2+#$src3) = #$src4",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memw($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memw($addr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memw($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memw($src2+#$src3) = $src4",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
-// if (Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memw($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-// if (Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1.new) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-// if (!Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1.new) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
-/// store to global address
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrid_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
- "memd(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if ($src1) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if (!$src1) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if ($src1.new) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if (!$src1.new) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrib_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrih_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STriw_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// memd(#global)=Rtt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STd_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, DoubleRegs:$src),
- "memd(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if ($src1) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if (!$src1) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if ($src1.new) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if (!$src1.new) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// memb(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STb_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// memh(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STh_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// memw(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STw_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
- (i64 DoubleRegs:$src1)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-// to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i64 DoubleRegs:$src1)),
- (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i64 DoubleRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-
//===----------------------------------------------------------------------===
// ST -
@@ -2962,853 +903,202 @@ def : Pat<(store (i32 IntRegs:$src1),
// NV/ST +
//===----------------------------------------------------------------------===//
-// Store new-value byte.
-
-// memb(Re=#U6)=Nt.new
-// memb(Rs+#s11:0)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
- "memb($addr) = $src1.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STrib_indexed_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
- "memb($src1+#$src2) = $src3.new",
- []>,
- Requires<[HasV4T]>;
-
-// memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memb($src1+$src2<<#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// memb(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
- "memb($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
- "memb($src2++#$offset) = $src1.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
-// memb(Rx++#s4:0:circ(Mu))=Nt.new
-// memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu)=Nt.new
-// memb(Rx++Mu:brev)=Nt.new
-
-// memb(gp+#u16:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-// memb(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src.new",
+// multiclass for new-value store instructions with base + immediate offset.
+//
+multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
+ Operand predImmOp, bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
+}
-// Store new-value byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Nt.new
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
+ bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+ }
+}
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
+let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+ bits<5> PredImmBits> {
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memb($addr) = $src2.new",
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3.new",
[]>,
Requires<[HasV4T]>;
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ isPredicated = 1 in {
+ defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>;
+ defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>;
+ }
+ }
+}
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if ($src1) memb($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
+ defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 11, 6>, AddrModeRel;
+ defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 12, 7>, AddrModeRel;
+ defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 13, 8>, AddrModeRel;
+}
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memb($src2+#$src3) = $src4.new",
+// multiclass for new-value store instructions with base + immediate offset.
+// and MEMri operand.
+multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($addr) = $src2.new",
[]>,
Requires<[HasV4T]>;
+}
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if (!$src1) memb($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
+multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memb($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
+ // Predicate new
+ defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, RC:$src),
+ mnemonic#"($addr) = $src.new",
[]>,
Requires<[HasV4T]>;
-// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
-// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT,
+mayStore = 1 in {
+ defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+}
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+// memb(Ru<<#u2+#U6)=Nt.new
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
-// if (Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1.new) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// Post increment store
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
-// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1.new) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
+multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+ [],
+ "$src2 = $dst">,
Requires<[HasV4T]>;
+}
+multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
-// Store new-value halfword.
-// memh(Re=#U6)=Nt.new
-// memh(Rs+#s11:1)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
- "memh($addr) = $src1.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STrih_indexed_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
- "memh($src1+#$src2) = $src3.new",
- []>,
- Requires<[HasV4T]>;
+let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ mnemonic#"($src1++#$offset) = $src2.new",
+ [],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
-// memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memh($src1+$src2<<#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
+let validSubTargets = HasV4SubT in {
+defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+}
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
// memh(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
def STrih_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memh($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
- "memh($src2++#$offset) = $src1.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Nt.new
// memh(Rx++I:circ(Mu))=Nt.new
// memh(Rx++Mu)=Nt.new
// memh(Rx++Mu:brev)=Nt.new
-// memh(gp+#u16:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-// memh(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-
-// Store new-value halfword conditionally.
-
-// if ([!]Pv[.new]) memh(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if ($src1) memh($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memh($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if (!$src1) memh($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memh($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
-// if (Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1.new) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1.new) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
-// Store new-value word.
-
-// memw(Re=#U6)=Nt.new
-// memw(Rs+#s11:2)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STriw_nv_V4 : NVInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memw($addr) = $src1.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STriw_indexed_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
- "memw($src1+#$src2) = $src3.new",
- []>,
- Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
- "memw($src1+$src2<<#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
// memw(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
def STriw_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memw($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
- "memw($src2++#$offset) = $src1.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2:circ(Mu))=Nt.new
// memw(Rx++I:circ(Mu))=Nt.new
// memw(Rx++Mu)=Nt.new
// memw(Rx++Mu:brev)=Nt.new
-// memw(gp+#u16:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-// Store new-value word conditionally.
-
-// if ([!]Pv[.new]) memw(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if ($src1) memw($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if ($src1.new) memw($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if (!$src1) memw($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
- "if (!$src1.new) memw($src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
- isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
- IntRegs:$src5),
- "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
- []>,
- Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
-// if (Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1.new) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1.new) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// NV/ST -
@@ -3998,31 +1288,37 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
// Add and accumulate.
// Rd=add(Rs,add(Ru,#s6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3),
"$dst = add($src1, add($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
- s6ImmPred:$src3)))]>,
+ s6_16ExtPred:$src3)))]>,
Requires<[HasV4T]>;
// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
[(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+ (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
(i32 IntRegs:$src3))))]>,
Requires<[HasV4T]>;
// Generates the same instruction as ADDr_SUBri_V4 but matches different
// pattern.
// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
[(set (i32 IntRegs:$dst),
- (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+ (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
(i32 IntRegs:$src3)))]>,
Requires<[HasV4T]>;
@@ -4036,6 +1332,7 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
// Logical doublewords.
// Rdd=and(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = and($src1, ~$src2)",
@@ -4044,6 +1341,7 @@ def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// Rdd=or(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = or($src1, ~$src2)",
@@ -4054,6 +1352,7 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
// Logical-logical doublewords.
// Rxx^=xor(Rss,Rtt)
+let validSubTargets = HasV4SubT in
def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
"$dst ^= xor($src2, $src3)",
@@ -4066,17 +1365,20 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
// Logical-logical words.
// Rx=or(Ru,and(Rx,#s10))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT in
def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst = or($src1, and($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx[&|^]=and(Rs,Rt)
// Rx&=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, $src3)",
@@ -4087,6 +1389,7 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in
def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, $src3)",
@@ -4094,9 +1397,10 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx^=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, $src3)",
@@ -4108,6 +1412,7 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=and(Rs,~Rt)
// Rx&=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, ~$src3)",
@@ -4118,6 +1423,7 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, ~$src3)",
@@ -4128,6 +1434,7 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx^=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, ~$src3)",
@@ -4139,6 +1446,7 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=or(Rs,Rt)
// Rx&=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= or($src2, $src3)",
@@ -4149,6 +1457,7 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=or(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in
def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= or($src2, $src3)",
@@ -4156,9 +1465,10 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx^=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= or($src2, $src3)",
@@ -4170,6 +1480,7 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=xor(Rs,Rt)
// Rx&=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= xor($src2, $src3)",
@@ -4180,6 +1491,7 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= xor($src2, $src3)",
@@ -4190,6 +1502,7 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx^=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= xor($src2, $src3)",
@@ -4200,24 +1513,28 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in
def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst |= and($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx|=or(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in
def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst |= or($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Modulo wrap
@@ -4264,25 +1581,41 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
// Multiply and user lower result.
// Rd=add(#u6,mpyi(Rs,#U6))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+ (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3),
"$dst = add(#$src1, mpyi($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- u6ImmPred:$src1))]>,
+ u6ExtPred:$src1))]>,
Requires<[HasV4T]>;
-// Rd=add(#u6,mpyi(Rs,Rt))
+// Rd=add(##,mpyi(Rs,#U6))
+def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2,
+ u6ImmPred:$src3))>;
+// Rd=add(#u6,mpyi(Rs,Rt))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+ (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add(#$src1, mpyi($src2, $src3))",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- u6ImmPred:$src1))]>,
- Requires<[HasV4T]>;
+ u6ExtPred:$src1))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rd=add(##,mpyi(Rs,Rt))
+def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2,
+ IntRegs:$src3))>;
// Rd=add(Ru,mpyi(#u6:2,Rs))
+let validSubTargets = HasV4SubT in
def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi(#$src2, $src3))",
@@ -4292,15 +1625,18 @@ def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rd=add(Ru,mpyi(Rs,#u6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3),
"$dst = add($src1, mpyi($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u6ImmPred:$src3)))]>,
- Requires<[HasV4T]>;
+ u6ExtPred:$src3)))]>,
+ Requires<[HasV4T]>, ImmRegRel;
// Rx=add(Ru,mpyi(Rx,Rs))
+let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in
def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi($src2, $src3))",
@@ -4308,7 +1644,7 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src2 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Polynomial multiply words
@@ -4351,92 +1687,107 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Shift by immediate and accumulate.
// Rx=add(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=add(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=sub(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=sub(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Shift by immediate and logical.
//Rx=and(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=and(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,asl(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,lsr(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Shift by register.
//Rd=lsl(#s6,Rt)
+let validSubTargets = HasV4SubT in {
def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
"$dst = lsl(#$src1, $src2)",
[(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
@@ -4484,7 +1835,7 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
-
+}
//===----------------------------------------------------------------------===//
// XTYPE/SHIFT -
@@ -4494,488 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
// MEMOP: Word, Half, Byte
//===----------------------------------------------------------------------===//
+def MEMOPIMM : SDNodeXForm<imm, [{
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int32_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 65535..65515
+ // assigning to a short restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int16_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 255..235
+ // assigning to a char restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int8_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-31].
+ // As an SDNode.
+ // we bit negate the value first
+ int32_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-15].
+ // As an SDNode.
+ int16_t imm = N->getSExtValue();
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-15].
+ // As an SDNode.
+ // we bit negate the value first
+ int16_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-7].
+ // As an SDNode.
+ int8_t imm = N->getSExtValue();
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-7].
+ // As an SDNode.
+ // we bit negate the value first
+ int8_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
//===----------------------------------------------------------------------===//
-// MEMOP: Word
-//
-// Implemented:
-// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
-// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
-// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
-// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
-// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
-// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
-// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5
-// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5
-// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt
-// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt
-// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt
-// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt
-//
-// Not implemented:
-// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
-// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4<(outs),
+ (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+ opc#"($base+#$offset)"#memOp#"$delta",
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1110;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the immediate value.
//===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4 <(outs),
+ (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+ opc#"($base+#$offset)"#memOp#"#$delta"
+ #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1111;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+ def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+ def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+ def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
-// MEMw_ADDSUBi_indexed_V4:
-// pseudo operation for MEMw_ADDi_indexed_V4 and
-// MEMw_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
- "memw($base+#$offset) += #$addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+ def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+ def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
+ def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+}
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
- "memw($base+#$offset) -= #$subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
- "memw($base+#$offset) += $addend",
- [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
- "memw($base+#$offset) -= $subend",
- [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
- "memw($base+#$offset) &= $andend",
- [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
- "memw($base+#$offset) |= $orend",
- [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// MEMw_ADDSUBi_V4:
-// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memw($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+ defm r : MemOp_rr <opc, opcBits, ImmOp>;
+ defm i : MemOp_ri <opc, opcBits, ImmOp>;
+}
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memw($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memw($addr) += $addend",
- [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memw($addr) -= $subend",
- [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memw($addr) &= $andend",
- [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memw($addr) |= $orend",
- [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
+validSubTargets =HasV4SubT in {
+ let opExtentBits = 6, accessSize = ByteAccess in
+ defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+ let opExtentBits = 7, accessSize = HalfWordAccess in
+ defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+ let opExtentBits = 8, accessSize = WordAccess in
+ defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
//===----------------------------------------------------------------------===//
-// MEMOP: Halfword
-//
-// Implemented:
-// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
-// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
-// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
-// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
-// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
-// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
-// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5
-// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5
-// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt
-// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt
-// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt
-// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt
-//
-// Not implemented:
-// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
-// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
//===----------------------------------------------------------------------===//
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 180 in
+ def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
+
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+ u5ImmPred:$addend),
+ (add IntRegs:$base, ExtPred:$offset)),
+ (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+}
-// MEMh_ADDSUBi_indexed_V4:
-// Pseudo operation for MEMh_ADDi_indexed_V4 and
-// MEMh_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
- "memh($base+#$offset) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon addMI, InstHexagon subMI> {
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+}
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
- "memh($base+#$offset) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
- "memh($base+#$offset) += $addend",
- [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
- "memh($base+#$offset) -= $subend",
- [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
- "memh($base+#$offset) += $andend",
- [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
- "memh($base+#$offset) |= $orend",
- [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// MEMh_ADDSUBi_V4:
-// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
- m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memh($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+}
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memh($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memh($addr) += $addend",
- [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memh($addr) -= $subend",
- [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memh($addr) &= $andend",
- [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memh($addr) |= $orend",
- [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
+ // Word
+ defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
+ MemOPw_SUBi_V4>;
+}
//===----------------------------------------------------------------------===//
-// MEMOP: Byte
-//
-// Implemented:
-// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5
-// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5
-// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt
-// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt
-// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt
-// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt
-// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5
-// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5
-// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt
-// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt
-// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt
-// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt
-//
-// Not implemented:
-// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5)
-// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5)
-// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5)
-// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
//===----------------------------------------------------------------------===//
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ PatLeaf immPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI> {
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
+
+ let AddedComplexity = 195 in
+ def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$subend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+}
-// MEMb_ADDSUBi_indexed_V4:
-// Pseudo operation for MEMb_ADDi_indexed_V4 and
-// MEMb_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
- "memb($base+#$offset) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+ ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+ ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+}
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
- "memb($base+#$offset) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
- "memb($base+#$offset) += $addend",
- [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
- "memb($base+#$offset) -= $subend",
- [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
- "memb($base+#$offset) += $andend",
- [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
- "memb($base+#$offset) |= $orend",
- [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// MEMb_ADDSUBi_V4:
-// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
- m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memb($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memb($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memb($addr) += $addend",
- [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memb($addr) -= $subend",
- [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memb($addr) &= $andend",
- [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memb($addr) |= $orend",
- [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+ // Word
+ defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+ ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+}
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+ PatLeaf extPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+
+ // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+ let AddedComplexity = 250 in
+ def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$bitend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+ // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+ let AddedComplexity = 225 in
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Byte - clrbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+ ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+ // Byte - setbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
+ ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+ // Half Word - clrbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+ // Half Word - setbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+ // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+ // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
+
+ // memw(Rs+#0) = [clrbit|setbit](#U5)
+ // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+ defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ SETMEMIMM, MemOPw_SETBITi_V4, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
+ PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 141 in
+ // mem[bhw](Rs+#0) [+-&|]= Rt
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+
+ // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+ let AddedComplexity = 150 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
+ ComplexPattern addrPred, PatLeaf extPred,
+ InstHexagon addMI, InstHexagon subMI,
+ InstHexagon andMI, InstHexagon orMI > {
+
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
+ MemOPh_ADDr_V4, MemOPh_SUBr_V4,
+ MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+ // Byte
+ defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
+ MemOPb_ADDr_V4, MemOPb_SUBr_V4,
+ MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [HasV4T, UseMEMOP] in {
+ // Byte, Half Word
+ defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend
+ // Word
+ defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
+ MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+}
//===----------------------------------------------------------------------===//
// XTYPE/PRED +
@@ -4992,7 +2222,61 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
-let isCompare = 1 in
+// p=!cmp.eq(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.eq(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,#u9)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u9Ext:$src2),
+ "$dst = !cmp.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u8Imm:$src2),
"$dst = cmpb.eq($src1, #$src2)",
@@ -5000,8 +2284,14 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
Requires<[HasV4T]>;
+def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+ bb:$offset)>,
+ Requires<[HasV4T]>;
+
// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
@@ -5011,7 +2301,7 @@ def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
@@ -5021,7 +2311,7 @@ def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gt($src1, $src2)",
@@ -5031,29 +2321,237 @@ def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Imm:$src2),
+ (ins IntRegs:$src1, u7Ext:$src2),
"$dst = cmpb.gtu($src1, #$src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
- u7ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformU7ToU7M1Imm(imm);
+}]>;
+
+// For the sequence
+// zext( seteq ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( seteq (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 254), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setlt ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// This pattern interefers with coremark performance, not implementing at this
+// time.
+// For the sequence
+// zext( setgt ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+
+// For the sequence
+// zext( setuge ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setge ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setule ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setle ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+// biasing the expression by 48. We are depending on the representation of
+// the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+// retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+// retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7StrictPosImmPred:$src2)))),
+ (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
+ (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
+InputType = "reg" in
def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gtu($src1, $src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
(and (i32 IntRegs:$src2), 255)))]>,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Following instruction is not being extended as it results into the incorrect
// code for negative numbers.
// Signed half compare(.eq) ri.
// Pd=cmph.eq(Rs,#s8)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, s8Imm:$src2),
"$dst = cmph.eq($src1, #$src2)",
@@ -5067,7 +2565,7 @@ def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
// r0=and(r0,#0xffff)
// p0=cmp.eq(r0,#0)
// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
@@ -5082,7 +2580,7 @@ def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
// r1=asl(r1,16)
// p0=cmp.eq(r0,r1)
// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
@@ -5096,19 +2594,20 @@ used in the cmph.gt instruction.
// Signed half compare(.gt) ri.
// Pd=cmph.gt(Rs,#s8)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+isCompare = 1, validSubTargets = HasV4SubT in
def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s8Imm:$src2),
+ (ins IntRegs:$src1, s8Ext:$src2),
"$dst = cmph.gt($src1, #$src2)",
[(set (i1 PredRegs:$dst),
(setgt (shl (i32 IntRegs:$src1), (i32 16)),
- s8ImmPred:$src2))]>,
+ s8ExtPred:$src2))]>,
Requires<[HasV4T]>;
*/
// Signed half compare(.gt) rr.
// Pd=cmph.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gt($src1, $src2)",
@@ -5119,24 +2618,41 @@ def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
// Unsigned half compare rr (.gtu).
// Pd=cmph.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "reg" in
def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gtu($src1, $src2)",
[(set (i1 PredRegs:$dst),
(setugt (and (i32 IntRegs:$src1), 65535),
(and (i32 IntRegs:$src2), 65535)))]>,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Unsigned half compare ri (.gtu).
// Pd=cmph.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "imm" in
def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Imm:$src2),
+ (ins IntRegs:$src1, u7Ext:$src2),
"$dst = cmph.gtu($src1, #$src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
- u7ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// XTYPE/PRED -
@@ -5248,227 +2764,258 @@ let isReturn = 1, isTerminator = 1,
Requires<[HasV4T]>;
}
-
// Load/Store with absolute addressing mode
// memw(#u6)=Rt
-multiclass ST_abs<string OpcStr> {
- let isPredicable = 1 in
- def _abs_V4 : STInst2<(outs),
- (ins globaladdress:$absaddr, IntRegs:$src),
- !strconcat(OpcStr, "(##$absaddr) = $src"),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
+multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2",
[]>,
Requires<[HasV4T]>;
+}
- let isPredicated = 1 in
- def _abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
+let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src",
[]>,
Requires<[HasV4T]>;
- def _abs_nv_V4 : STInst2<(outs),
- (ins globaladdress:$absaddr, IntRegs:$src),
- !strconcat(OpcStr, "(##$absaddr) = $src.new"),
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
- let isPredicated = 1 in
- def _abs_cPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2.new",
[]>,
Requires<[HasV4T]>;
+}
- let isPredicated = 1 in
- def _abs_cNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src.new",
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
}
-let AddedComplexity = 30, isPredicable = 1 in
-def STrid_abs_V4 : STInst<(outs),
- (ins globaladdress:$absaddr, DoubleRegs:$src),
- "memd(##$absaddr) = $src",
- [(store (i64 DoubleRegs:$src),
- (HexagonCONST32 tglobaladdr:$absaddr))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if ($src1) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if (!$src1) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+let addrMode = Absolute in {
+ defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
+ ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if ($src1.new) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+ defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
+ ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if (!$src1.new) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+ defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
+ ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
-defm STrib : ST_abs<"memb">;
-defm STrih : ST_abs<"memh">;
-defm STriw : ST_abs<"memw">;
+ let isNVStorable = 0 in
+ defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
-let Predicates = [HasV4T], AddedComplexity = 30 in
+let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
(STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(truncstorei16 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
(STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
(STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+def : Pat<(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+}
-multiclass LD_abs<string OpcStr> {
- let isPredicable = 1 in
- def _abs_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$absaddr),
- !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if ($src1) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with GP-relative addressing mode.
+// mem[bhwd](#global)=Rt
+// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
+//===----------------------------------------------------------------------===//
+multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdress:$global, RC:$src),
+ mnemonic#"(#$global) = $src",
+ []>;
+
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if (!$src1) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$global, RC:$src),
+ mnemonic#"(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if ($src1.new) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if (!$src1.new) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in {
+defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
+ ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
+defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
+ ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
+defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
+ ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
+defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
+ ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
}
-let AddedComplexity = 30 in
-def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins globaladdress:$absaddr),
- "$dst = memd(##$absaddr)",
- [(set (i64 DoubleRegs:$dst),
- (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
- Requires<[HasV4T]>;
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if ($src1) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if (!$src1) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if ($src1.new) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if (!$src1.new) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+// to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+}
-defm LDrib : LD_abs<"memb">;
-defm LDriub : LD_abs<"memub">;
-defm LDrih : LD_abs<"memh">;
-defm LDriuh : LD_abs<"memuh">;
-defm LDriw : LD_abs<"memw">;
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let isExtended = 1, neverHasSideEffects = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 1, isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdressExt:$absaddr),
+ "$dst = "#mnemonic#"(##$absaddr)",
+ []>,
+ Requires<[HasV4T]>;
+ let opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = Absolute in {
+ defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
+ defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+ defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
+ defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+ defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
@@ -5490,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdress:$global),
+ "$dst = "#mnemonic#"(#$global)",
+ []>;
+
+ let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>;
+defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
+defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
+defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>;
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+
// Transfer global address into a register
let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
@@ -5497,6 +3145,11 @@ def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
[(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
Requires<[HasV4T]>;
+// Transfer a block address into a register
+def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
+ (TFRI_V4 tblockaddress:$src1)>,
+ Requires<[HasV4T]>;
+
let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$src2),
@@ -5588,172 +3241,167 @@ defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
defm STriw_ind : ST_indirect_lo<"memw", store>;
// Store - absolute addressing mode: These instruction take constant
-// value as the extended operand
+// value as the extended operand.
multiclass ST_absimm<string OpcStr> {
- let isPredicable = 1 in
+let isExtended = 1, opExtendable = 0, isPredicable = 1,
+validSubTargets = HasV4SubT in
def _abs_V4 : STInst2<(outs),
- (ins u6Imm:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(#$src1) = $src2"),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2"),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
+let isExtended = 1, opExtendable = 1, isPredicated = 1,
+validSubTargets = HasV4SubT in {
def _abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
- !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
- !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3")),
+ !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3")),
+ !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
+}
- def _abs_nv_V4 : STInst2<(outs),
- (ins u6Imm:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(#$src1) = $src2.new"),
+let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
+validSubTargets = HasV4SubT in
+ def _abs_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2.new"),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
+isNVStore = 1, validSubTargets = HasV4SubT in {
+ def _abs_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
}
+}
defm STrib_imm : ST_absimm<"memb">;
defm STrih_imm : ST_absimm<"memh">;
defm STriw_imm : ST_absimm<"memw">;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
-
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+}
// Load - absolute addressing mode: These instruction take constant
// value as the extended operand
multiclass LD_absimm<string OpcStr> {
- let isPredicable = 1 in
+let isExtended = 1, opExtendable = 1, isPredicable = 1,
+validSubTargets = HasV4SubT in
def _abs_V4 : LDInst2<(outs IntRegs:$dst),
- (ins u6Imm:$src),
+ (ins u0AlwaysExt:$src),
!strconcat("$dst = ",
- !strconcat(OpcStr, "(#$src)")),
+ !strconcat(OpcStr, "(##$src)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1,
+validSubTargets = HasV4SubT in {
def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if ($src1) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if (!$src1) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if ($src1.new) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if (!$src1.new) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
}
+}
-defm LDrib_imm : LD_absimm<"memb">;
+defm LDrib_imm : LD_absimm<"memb">;
defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm : LD_absimm<"memh">;
+defm LDrih_imm : LD_absimm<"memh">;
defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm : LD_absimm<"memw">;
+defm LDriw_imm : LD_absimm<"memw">;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(i32 (load u6ImmPred:$src)),
- (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(i32 (load u0AlwaysExtPred:$src)),
+ (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
- (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
+ (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
- (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
+ (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
- (LDrih_imm_abs_V4 u6ImmPred:$src)>;
-
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
- (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
+ (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
+ (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+}
// Indexed store double word - global address.
// memw(Rs+#u6:2)=#S8
@@ -5775,3 +3423,109 @@ def STrih_offset_ext_V4 : STInst<(outs),
[(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
(add IntRegs:$src1, u6_1ImmPred:$src2))]>,
Requires<[HasV4T]>;
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1),
+ FoldGlobalAddrGP:$addr),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
+ (i64 DoubleRegs:$src1)),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+//let AddedComplexity = 100 in
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
deleted file mode 100644
index 7a16c241ff8f..000000000000
--- a/lib/Target/Hexagon/HexagonMCInst.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
-
-#include "llvm/MC/MCInst.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
- class HexagonMCInst: public MCInst {
- // Packet start and end markers
- unsigned startPacket: 1, endPacket: 1;
- const MachineInstr *MachineI;
- public:
- explicit HexagonMCInst(): MCInst(),
- startPacket(0), endPacket(0) {}
-
- const MachineInstr* getMI() const { return MachineI; }
-
- void setMI(const MachineInstr *MI) { MachineI = MI; }
-
- bool isStartPacket() const { return (startPacket); }
- bool isEndPacket() const { return (endPacket); }
-
- void setStartPacket(bool yes) { startPacket = yes; }
- void setEndPacket(bool yes) { endPacket = yes; }
- };
-}
-
-#endif
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 70bddcc76a59..f011d51bd61a 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,8 +15,9 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Constants.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
@@ -38,9 +39,10 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
}
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
+void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
HexagonAsmPrinter& AP) {
MCI.setOpcode(MI->getOpcode());
+ MCI.setDesc(MI->getDesc());
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 0e9ef4838d8a..1388ad4f167d 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -15,8 +15,8 @@
#define DEBUG_TYPE "misched"
#include "HexagonMachineScheduler.h"
-
-#include <queue>
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
@@ -153,7 +153,16 @@ void VLIWMachineScheduler::schedule() {
// Postprocess the DAG to add platform specific artificial dependencies.
postprocessDAG();
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
// To view Height/Depth correctly, they should be accessed at least once.
+ //
+ // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+ // depth/height could be computed directly from the roots and leaves.
DEBUG(unsigned maxH = 0;
for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
if (SUnits[su].getHeight() > maxH)
@@ -167,7 +176,7 @@ void VLIWMachineScheduler::schedule() {
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
- initQueues();
+ initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -187,6 +196,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
DAG = static_cast<VLIWMachineScheduler*>(dag);
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
+
Top.init(DAG, SchedModel);
Bot.init(DAG, SchedModel);
@@ -194,6 +204,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
@@ -678,4 +690,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Bot.bumpNode(SU);
}
}
-
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index fe0242a0f74e..f68dadf29210 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -14,6 +14,9 @@
#ifndef HEXAGONASMPRINTER_H
#define HEXAGONASMPRINTER_H
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
@@ -22,14 +25,11 @@
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 1e91c3948550..5e80e48b01d5 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,31 +22,29 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-nvj"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-
#include <map>
-
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
@@ -222,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
return false;
}
- unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+ unsigned cmpReg1, cmpOp2;
cmpReg1 = MI->getOperand(1).getReg();
if (secondReg) {
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
new file mode 100644
index 000000000000..c79d78f21080
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -0,0 +1,858 @@
+//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Immediate operands.
+
+let PrintMethod = "printImmOperand" in {
+ // f32Ext type is used to identify constant extended floating point immediates.
+ def f32Ext : Operand<f32>;
+ def s32Imm : Operand<i32>;
+ def s26_6Imm : Operand<i32>;
+ def s16Imm : Operand<i32>;
+ def s12Imm : Operand<i32>;
+ def s11Imm : Operand<i32>;
+ def s11_0Imm : Operand<i32>;
+ def s11_1Imm : Operand<i32>;
+ def s11_2Imm : Operand<i32>;
+ def s11_3Imm : Operand<i32>;
+ def s10Imm : Operand<i32>;
+ def s9Imm : Operand<i32>;
+ def m9Imm : Operand<i32>;
+ def s8Imm : Operand<i32>;
+ def s8Imm64 : Operand<i64>;
+ def s6Imm : Operand<i32>;
+ def s4Imm : Operand<i32>;
+ def s4_0Imm : Operand<i32>;
+ def s4_1Imm : Operand<i32>;
+ def s4_2Imm : Operand<i32>;
+ def s4_3Imm : Operand<i32>;
+ def u64Imm : Operand<i64>;
+ def u32Imm : Operand<i32>;
+ def u26_6Imm : Operand<i32>;
+ def u16Imm : Operand<i32>;
+ def u16_0Imm : Operand<i32>;
+ def u16_1Imm : Operand<i32>;
+ def u16_2Imm : Operand<i32>;
+ def u11_3Imm : Operand<i32>;
+ def u10Imm : Operand<i32>;
+ def u9Imm : Operand<i32>;
+ def u8Imm : Operand<i32>;
+ def u7Imm : Operand<i32>;
+ def u6Imm : Operand<i32>;
+ def u6_0Imm : Operand<i32>;
+ def u6_1Imm : Operand<i32>;
+ def u6_2Imm : Operand<i32>;
+ def u6_3Imm : Operand<i32>;
+ def u5Imm : Operand<i32>;
+ def u4Imm : Operand<i32>;
+ def u3Imm : Operand<i32>;
+ def u2Imm : Operand<i32>;
+ def u1Imm : Operand<i32>;
+ def n8Imm : Operand<i32>;
+ def m6Imm : Operand<i32>;
+}
+
+let PrintMethod = "printNOneImmOperand" in
+def nOneImm : Operand<i32>;
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred : PatLeaf<(i32 imm), [{
+ // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x1000000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
+ // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x10000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<24,16>(v);
+}]>;
+
+def s26_6ImmPred : PatLeaf<(i32 imm), [{
+ // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<26,6>(v);
+}]>;
+
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred : PatLeaf<(i32 imm), [{
+ // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred : PatLeaf<(i32 imm), [{
+ // s12ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field and is a multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
+ // sign extended field and is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
+ // sign extended field and is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred : PatLeaf<(i32 imm), [{
+ // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v);
+}]>;
+
+def m9ImmPred : PatLeaf<(i32 imm), [{
+ // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
+ // field. The range of m9 is -255 to 255.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v) && (v != -256);
+}]>;
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // Adding "N ||" to suppress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u26_6ImmPred : PatLeaf<(i32 imm), [{
+ // u26_6ImmPred - True if the immediate fits in a 32-bit field and
+ // is a multiple of 64.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<26,6>(v);
+}]>;
+
+def u16ImmPred : PatLeaf<(i32 imm), [{
+ // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+ // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+ // extended s8 field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+ // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+ // unsigned field and is strictly greater than 0.
+ return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field. Same as u6ImmPred.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+ // field that is 1 bit alinged - multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field that is 2 bits alinged - multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field that is 3 bits alinged - multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m5BImmPred : PatLeaf<(i32 imm), [{
+ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int8_t v = (int8_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred : PatLeaf<(i32 imm), [{
+ // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int16_t v = (int16_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred : PatLeaf<(i32 imm), [{
+ // m5ImmPred predicate - True if the number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred : PatLeaf<(i32 imm), [{
+ // nOneImmPred predicate - True if the immediate is -1.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-1 == v);
+}]>;
+
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+ // Set5ImmPred predicate - True if the number is in the series of values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in setbit immediate.
+ uint32_t v = (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+ // Clr5ImmPred predicate - True if the number is in the series of
+ // bit negated values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in clrbit immediate.
+ // Note: we are bit NOTing the value.
+ uint32_t v = ~ (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+ int32_t v = (int32_t)N->getSExtValue();
+ return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+ // Set4ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit immediate.
+ uint16_t v = (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+ // Clr4ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit and clrbit immediate.
+ uint16_t v = ~ (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+ int16_t v = (int16_t)N->getSExtValue();
+ return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+ // Set3ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit immediate.
+ uint8_t v = (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+ // Clr3ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit and clrbit immediate.
+ uint8_t v = ~ (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr3ImmPred predicate - True if the immediate is in the range 0..7.
+ int8_t v = (int8_t)N->getSExtValue();
+ return (v >= 0 && v <= 7);
+}]>;
+
+
+// Extendable immediate operands.
+
+let PrintMethod = "printExtOperand" in {
+ def s16Ext : Operand<i32>;
+ def s12Ext : Operand<i32>;
+ def s10Ext : Operand<i32>;
+ def s9Ext : Operand<i32>;
+ def s8Ext : Operand<i32>;
+ def s6Ext : Operand<i32>;
+ def s11_0Ext : Operand<i32>;
+ def s11_1Ext : Operand<i32>;
+ def s11_2Ext : Operand<i32>;
+ def s11_3Ext : Operand<i32>;
+ def u6Ext : Operand<i32>;
+ def u7Ext : Operand<i32>;
+ def u8Ext : Operand<i32>;
+ def u9Ext : Operand<i32>;
+ def u10Ext : Operand<i32>;
+ def u6_0Ext : Operand<i32>;
+ def u6_1Ext : Operand<i32>;
+ def u6_2Ext : Operand<i32>;
+ def u6_3Ext : Operand<i32>;
+}
+
+let PrintMethod = "printImmOperand" in
+def u0AlwaysExt : Operand<i32>;
+
+// Predicates for constant extendable operands
+def s16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 16-bit sign extended field.
+ return isInt<16>(v);
+ else {
+ if (isInt<16>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s10ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field.
+ return isInt<10>(v);
+ else {
+ if (isInt<10>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s9ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field.
+ return isInt<9>(v);
+ else {
+ if (isInt<9>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s8ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit sign extended field.
+ return isInt<8>(v);
+ else {
+ if (isInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s8_16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate fits in a 8-bit sign extended field.
+ return isInt<8>(v);
+ else {
+ if (isInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 16-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<16>(v);
+ }
+}]>;
+
+def s6ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s6_16ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate fits in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 16-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<16>(v);
+ }
+}]>;
+
+def s6_10ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit sign extended field.
+ return isInt<6>(v);
+ else {
+ if (isInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can't fit in a 10-bit signed field. This is required to avoid
+ // unnecessary constant extenders.
+ return isConstExtProfitable(Node) && !isInt<10>(v);
+ }
+}]>;
+
+def s11_0ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 11-bit sign extended field.
+ return isShiftedInt<11,0>(v);
+ else {
+ if (isInt<11>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit signed field.
+ return isConstExtProfitable(Node) && isInt<32>(v);
+ }
+}]>;
+
+def s11_1ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 12-bit sign extended field and
+ // is 2 byte aligned.
+ return isShiftedInt<11,1>(v);
+ else {
+ if (isInt<12>(v))
+ return isShiftedInt<11,1>(v);
+
+ // Return true if extending this immediate is profitable and the low 1 bit
+ // is zero (2-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
+ }
+}]>;
+
+def s11_2ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 13-bit sign extended field and
+ // is 4-byte aligned.
+ return isShiftedInt<11,2>(v);
+ else {
+ if (isInt<13>(v))
+ return isShiftedInt<11,2>(v);
+
+ // Return true if extending this immediate is profitable and the low 2-bits
+ // are zero (4-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0);
+ }
+}]>;
+
+def s11_3ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 14-bit sign extended field and
+ // is 8-byte aligned.
+ return isShiftedInt<11,3>(v);
+ else {
+ if (isInt<14>(v))
+ return isShiftedInt<11,3>(v);
+
+ // Return true if extending this immediate is profitable and the low 3-bits
+ // are zero (8-byte aligned).
+ return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0);
+ }
+}]>;
+
+def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
+ // Predicate for an unsigned 32-bit value that always needs to be extended.
+ if (Subtarget.hasV4TOps()) {
+ if (isConstExtProfitable(Node)) {
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+ }
+ }
+ return false;
+}]>;
+
+def u6ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 6-bit unsigned field.
+ return isUInt<6>(v);
+ else {
+ if (isUInt<6>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u7ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 7-bit unsigned field.
+ return isUInt<7>(v);
+ else {
+ if (isUInt<7>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u8ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit unsigned field.
+ return isUInt<8>(v);
+ else {
+ if (isUInt<8>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u9ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit unsigned field.
+ return isUInt<9>(v);
+ else {
+ if (isUInt<9>(v))
+ return true;
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v);
+ }
+}]>;
+
+def u6_1ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 7-bit unsigned field and
+ // is 2-byte aligned.
+ return isShiftedUInt<6,1>(v);
+ else {
+ if (isUInt<7>(v))
+ return isShiftedUInt<6,1>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
+ }
+}]>;
+
+def u6_2ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 8-bit unsigned field and
+ // is 4-byte aligned.
+ return isShiftedUInt<6,2>(v);
+ else {
+ if (isUInt<8>(v))
+ return isShiftedUInt<6,2>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
+ }
+}]>;
+
+def u6_3ExtPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (!Subtarget.hasV4TOps())
+ // Return true if the immediate can fit in a 9-bit unsigned field and
+ // is 8-byte aligned.
+ return isShiftedUInt<6,3>(v);
+ else {
+ if (isUInt<9>(v))
+ return isShiftedUInt<6,3>(v);
+
+ // Return true if extending this immediate is profitable and the value
+ // can fit in a 32-bit unsigned field.
+ return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
+ }
+}]>;
+
+// Addressing modes.
+
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMEMrrOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+ let PrintMethod = "printMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+ let PrintMethod = "printFrameIndexOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in {
+ def globaladdress : Operand<i32>;
+ def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index a295015de561..576f1d7d0790 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -38,21 +38,21 @@
#define DEBUG_TYPE "hexagon-peephole"
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 37424860564f..d8b4e2fcb368 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -17,8 +17,6 @@
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,7 +24,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -117,37 +118,15 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
"architecture version");
}
-void HexagonRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
-
- if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
- // Hexagon_TODO: add code
- } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
- // Hexagon_TODO: add code
- } else {
- llvm_unreachable("Cannot handle this call frame pseudo instruction");
- }
- MBB.erase(I);
-}
-
void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
-
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
//
// Hexagon_TODO: Do we need to enforce this for Hexagon?
assert(SPAdj == 0 && "Unexpected");
-
- unsigned i = 0;
MachineInstr &MI = *II;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Addressable stack objects are accessed using neg. offsets from %fp.
MachineFunction &MF = *MI.getParent()->getParent();
@@ -167,8 +146,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
!TII.isSpillPredRegOp(&MI)) {
// Replace frame index with a stack pointer reference.
- MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
+ false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
} else {
// Replace frame index with a frame pointer reference.
if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
@@ -205,8 +185,8 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
dstReg).addReg(FrameReg).addImm(Offset);
}
- MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
(MI.getOpcode() == Hexagon::STriw) ||
(MI.getOpcode() == Hexagon::STrid) ||
@@ -233,29 +213,44 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.get(Hexagon::ADD_ri),
resReg).addReg(FrameReg).addImm(Offset);
}
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else if (TII.isMemOp(&MI)) {
- unsigned resReg = HEXAGON_RESERVED_REG_1;
- if (!MFI.hasVarSizedObjects() &&
- TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
- MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
- true);
- MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
- } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
- resReg).addReg(FrameReg).addReg(resReg);
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ // use the constant extender if the instruction provides it
+ // and we are V4TOps.
+ if (Subtarget.hasV4TOps()) {
+ if (TII.isConstExtended(&MI)) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ TII.immediateExtend(&MI);
+ } else {
+ llvm_unreachable("Need to implement for memops");
+ }
} else {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_ri),
- resReg).addReg(FrameReg).addImm(Offset);
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ // Only V3 and older instructions here.
+ unsigned ResReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+ false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+ addReg(ResReg);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
+ addImm(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ }
}
} else {
unsigned dstReg = MI.getOperand(0).getReg();
@@ -265,14 +260,14 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.get(Hexagon::ADD_rr),
dstReg).addReg(FrameReg).addReg(dstReg);
// Can we delete MI??? r2 = add (r2, #0).
- MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
}
} else {
// If the offset is small enough to fit in the immediate field, directly
// encode it.
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
}
}
@@ -310,58 +305,6 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
Moves.push_back(MachineMove(0, Dst, Src));
}
-// Get the weight in units of pressure for this register class.
-const RegClassWeight &
-HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
- // Each TargetRegisterClass has a per register weight, and weight
- // limit which must be less than the limits of its pressure sets.
- static const RegClassWeight RCWeightTable[] = {
- {1, 32}, // IntRegs
- {1, 8}, // CRRegs
- {1, 4}, // PredRegs
- {2, 16}, // DoubleRegs
- {0, 0} };
- return RCWeightTable[RC->getID()];
-}
-
-/// Get the number of dimensions of register pressure.
-unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
- return 4;
-}
-
-/// Get the name of this register unit pressure set.
-const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
- static const char *const RegPressureSetName[] = {
- "IntRegsRegSet",
- "CRRegsRegSet",
- "PredRegsRegSet",
- "DoubleRegsRegSet"
- };
- assert((Idx < 4) && "Index out of bounds");
- return RegPressureSetName[Idx];
-}
-
-/// Get the register unit pressure limit for this dimension.
-/// This limit must be adjusted dynamically for reserved registers.
-unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
- static const int RegPressureLimit [] = { 16, 4, 2, 8 };
- assert((Idx < 4) && "Index out of bounds");
- return RegPressureLimit[Idx];
-}
-
-const int*
-HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
- const {
- static const int RCSetsTable[] = {
- 0, -1, // IntRegs
- 1, -1, // CRRegs
- 2, -1, // PredRegs
- 0, -1, // DoubleRegs
- -1 };
- static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
- unsigned SetListStart = RCSetStartTable[RC->getID()];
- return &RCSetsTable[SetListStart];
-}
unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 8820d13e0122..8a3f94a3fd12 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef HexagonREGISTERINFO_H
#define HexagonREGISTERINFO_H
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
#include "HexagonGenRegisterInfo.inc"
@@ -56,12 +56,9 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
@@ -87,11 +84,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
- unsigned getNumRegPressureSets() const;
- const char *getRegPressureSetName(unsigned Idx) const;
- unsigned getRegPressureSetLimit(unsigned Idx) const;
- const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 4d93dd18d4e0..34bf4eacfdc0 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -12,11 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
#include "HexagonTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -50,7 +51,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
unsigned Idx = 1;
for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
++AI, ++Idx) {
- if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
Argument* Arg = AI;
if (!isa<PointerType>(Arg->getType())) {
for (Instruction::use_iterator UI = Arg->use_begin();
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index b5ff69a701cd..c2cfbb9710a6 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -8,10 +8,11 @@
//===----------------------------------------------------------------------===//
// Functional Units
-def LUNIT : FuncUnit;
-def LSUNIT : FuncUnit;
-def MUNIT : FuncUnit;
-def SUNIT : FuncUnit;
+def LSUNIT : FuncUnit; // SLOT0
+def LUNIT : FuncUnit; // SLOT1
+def MUNIT : FuncUnit; // SLOT2
+def SUNIT : FuncUnit; // SLOT3
+def LOOPUNIT : FuncUnit;
// Itinerary classes
def ALU32 : InstrItinClass;
@@ -20,27 +21,34 @@ def CR : InstrItinClass;
def J : InstrItinClass;
def JR : InstrItinClass;
def LD : InstrItinClass;
+def LD0 : InstrItinClass;
def M : InstrItinClass;
def ST : InstrItinClass;
+def ST0 : InstrItinClass;
def S : InstrItinClass;
def SYS : InstrItinClass;
-def MARKER : InstrItinClass;
+def ENDLOOP : InstrItinClass;
def PSEUDO : InstrItinClass;
+def PSEUDOM : InstrItinClass;
def HexagonItineraries :
- ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [
InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>,
InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>,
InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+ InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>,
+ InstrStage<1, [MUNIT, SUNIT]>]>
]>;
def HexagonModel : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 5668ae81e82e..ef72cf4068bf 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -28,6 +28,10 @@ def SLOT0 : FuncUnit;
def SLOT1 : FuncUnit;
def SLOT2 : FuncUnit;
def SLOT3 : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
// Itinerary classes.
def NV_V4 : InstrItinClass;
@@ -36,22 +40,26 @@ def MEM_V4 : InstrItinClass;
def PREFIX : InstrItinClass;
def HexagonItinerariesV4 :
- ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>,
InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
]>;
def HexagonModelV4 : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index a81cd913a6ec..814249fa6832 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -27,24 +27,25 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "xfer"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 4bacb8fa670d..07d5ce1d8ab0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden,
static cl::opt<bool>
EnableMemOps(
"enable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
- cl::desc("Generate V4 memop instructions."));
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc(
+ "Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+DisableMemOps(
+ "disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc(
+ "Do not generate V4 MEMOP in code generation for Hexagon target"));
static cl::opt<bool>
EnableIEEERndNear(
@@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if (EnableMemOps)
+ // UseMemOps on by default unless disabled explicitly
+ if (DisableMemOps)
+ UseMemOps = false;
+ else if (EnableMemOps)
UseMemOps = true;
else
UseMemOps = false;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 5d9d6d890d98..76a8fba195f3 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -14,8 +14,8 @@
#ifndef Hexagon_SUBTARGET_H
#define Hexagon_SUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 30866e9eeba8..ce45c626f799 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -15,13 +15,13 @@
#include "Hexagon.h"
#include "HexagonISelLowering.h"
#include "HexagonMachineScheduler.h"
-#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -74,21 +78,21 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
- setMCUseCFI(false);
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ setMCUseCFI(false);
}
// addPassesForOptimizations - Allow the backend (target) to add Target
// Independent Optimization passes to the Pass Manager.
bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
-
- PM.add(createConstantPropagationPass());
- PM.add(createLoopSimplifyPass());
- PM.add(createDeadCodeEliminationPass());
- PM.add(createConstantPropagationPass());
- PM.add(createLoopUnrollPass());
- PM.add(createLoopStrengthReducePass());
+ if (getOptLevel() != CodeGenOpt::None) {
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass());
+ }
return true;
}
@@ -122,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool HexagonPassConfig::addInstSelector() {
- addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
- addPass(createHexagonISelDag(getHexagonTargetMachine()));
- addPass(createHexagonPeephole());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
+ addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPeephole());
+
return false;
}
bool HexagonPassConfig::addPreRegAlloc() {
- if (!DisableHardwareLoops) {
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHexagonHardwareLoops());
- }
return false;
}
bool HexagonPassConfig::addPostRegAlloc() {
- addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
return true;
}
bool HexagonPassConfig::addPreSched2() {
- addPass(&IfConverterID);
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
return true;
}
bool HexagonPassConfig::addPreEmitPass() {
- if (!DisableHardwareLoops) {
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHexagonFixupHwLoops());
- }
- addPass(createHexagonNewValueJump());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonNewValueJump());
// Expand Spill code for predicate registers.
addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
@@ -162,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
// Create Packets.
- addPass(createHexagonPacketizer());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPacketizer());
return false;
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 7a4215c119a9..cf8f9aa3612f 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,14 +14,13 @@
#ifndef HexagonTARGETMACHINE_H
#define HexagonTARGETMACHINE_H
-#include "HexagonInstrInfo.h"
-#include "HexagonSubtarget.h"
+#include "HexagonFrameLowering.h"
#include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
#include "HexagonSelectionDAGInfo.h"
-#include "HexagonFrameLowering.h"
+#include "HexagonSubtarget.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine {
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
const InstrItineraryData* InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -71,14 +68,6 @@ public:
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
-
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
virtual const DataLayout *getDataLayout() const { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index f4d7761ac358..993fcfaed43e 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -14,13 +14,13 @@
#include "HexagonTargetObjectFile.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 3d5f685028ea..c0d86da1c05e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -149,7 +149,6 @@ namespace {
bool canReserveResourcesForConstExt(MachineInstr *MI);
void reserveResourcesForConstExt(MachineInstr* MI);
bool isNewValueInst(MachineInstr* MI);
- bool isDotNewInst(MachineInstr* MI);
};
}
@@ -242,8 +241,9 @@ static bool IsIndirectCall(MachineInstr* MI) {
// reservation fail.
void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
- QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
@@ -257,10 +257,10 @@ void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- assert(QII->isExtended(MI) &&
+ assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
"Should only be called for constant extended instructions");
MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
MI->getDebugLoc());
bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
MF->DeleteMachineInstr(PseudoMI);
@@ -271,8 +271,9 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
// true, otherwise, return false.
bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
- QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
@@ -349,17 +350,6 @@ static bool IsControlFlow(MachineInstr* MI) {
return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
}
-bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if (QII->isNewValueJump(MI))
- return true;
-
- if (QII->isNewValueStore(MI))
- return true;
-
- return false;
-}
-
// Function returns true if an instruction can be promoted to the new-value
// store. It will always return false for v2 and v3.
// It lists all the conditional and unconditional stores that can be promoted
@@ -375,7 +365,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STrib_indexed:
case Hexagon::STrib_indexed_shl_V4:
case Hexagon::STrib_shl_V4:
- case Hexagon::STrib_GP_V4:
case Hexagon::STb_GP_V4:
case Hexagon::POST_STbri:
case Hexagon::STrib_cPt:
@@ -398,17 +387,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STb_GP_cNotPt_V4:
case Hexagon::STb_GP_cdnPt_V4:
case Hexagon::STb_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cPt_V4:
- case Hexagon::STrib_GP_cNotPt_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
// store halfword
case Hexagon::STrih:
case Hexagon::STrih_indexed:
case Hexagon::STrih_indexed_shl_V4:
case Hexagon::STrih_shl_V4:
- case Hexagon::STrih_GP_V4:
case Hexagon::STh_GP_V4:
case Hexagon::POST_SThri:
case Hexagon::STrih_cPt:
@@ -431,17 +415,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STh_GP_cNotPt_V4:
case Hexagon::STh_GP_cdnPt_V4:
case Hexagon::STh_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cPt_V4:
- case Hexagon::STrih_GP_cNotPt_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
// store word
case Hexagon::STriw:
case Hexagon::STriw_indexed:
case Hexagon::STriw_indexed_shl_V4:
case Hexagon::STriw_shl_V4:
- case Hexagon::STriw_GP_V4:
case Hexagon::STw_GP_V4:
case Hexagon::POST_STwri:
case Hexagon::STriw_cPt:
@@ -464,10 +443,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STw_GP_cNotPt_V4:
case Hexagon::STw_GP_cdnPt_V4:
case Hexagon::STw_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cPt_V4:
- case Hexagon::STriw_GP_cNotPt_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
return QRI->Subtarget.hasV4TOps();
}
return false;
@@ -507,9 +482,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STrib_shl_V4:
return Hexagon::STrib_shl_nv_V4;
- case Hexagon::STrib_GP_V4:
- return Hexagon::STrib_GP_nv_V4;
-
case Hexagon::STb_GP_V4:
return Hexagon::STb_GP_nv_V4;
@@ -576,18 +548,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STb_GP_cdnNotPt_V4:
return Hexagon::STb_GP_cdnNotPt_nv_V4;
- case Hexagon::STrib_GP_cPt_V4:
- return Hexagon::STrib_GP_cPt_nv_V4;
-
- case Hexagon::STrib_GP_cNotPt_V4:
- return Hexagon::STrib_GP_cNotPt_nv_V4;
-
- case Hexagon::STrib_GP_cdnPt_V4:
- return Hexagon::STrib_GP_cdnPt_nv_V4;
-
- case Hexagon::STrib_GP_cdnNotPt_V4:
- return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
// store new value halfword
case Hexagon::STrih:
return Hexagon::STrih_nv_V4;
@@ -601,9 +561,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STrih_shl_V4:
return Hexagon::STrih_shl_nv_V4;
- case Hexagon::STrih_GP_V4:
- return Hexagon::STrih_GP_nv_V4;
-
case Hexagon::STh_GP_V4:
return Hexagon::STh_GP_nv_V4;
@@ -670,18 +627,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STh_GP_cdnNotPt_V4:
return Hexagon::STh_GP_cdnNotPt_nv_V4;
- case Hexagon::STrih_GP_cPt_V4:
- return Hexagon::STrih_GP_cPt_nv_V4;
-
- case Hexagon::STrih_GP_cNotPt_V4:
- return Hexagon::STrih_GP_cNotPt_nv_V4;
-
- case Hexagon::STrih_GP_cdnPt_V4:
- return Hexagon::STrih_GP_cdnPt_nv_V4;
-
- case Hexagon::STrih_GP_cdnNotPt_V4:
- return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
// store new value word
case Hexagon::STriw:
return Hexagon::STriw_nv_V4;
@@ -695,9 +640,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STriw_shl_V4:
return Hexagon::STriw_shl_nv_V4;
- case Hexagon::STriw_GP_V4:
- return Hexagon::STriw_GP_nv_V4;
-
case Hexagon::STw_GP_V4:
return Hexagon::STw_GP_nv_V4;
@@ -764,17 +706,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STw_GP_cdnNotPt_V4:
return Hexagon::STw_GP_cdnNotPt_nv_V4;
- case Hexagon::STriw_GP_cPt_V4:
- return Hexagon::STriw_GP_cPt_nv_V4;
-
- case Hexagon::STriw_GP_cNotPt_V4:
- return Hexagon::STriw_GP_cNotPt_nv_V4;
-
- case Hexagon::STriw_GP_cdnPt_V4:
- return Hexagon::STriw_GP_cdnPt_nv_V4;
-
- case Hexagon::STriw_GP_cdnNotPt_V4:
- return Hexagon::STriw_GP_cdnNotPt_nv_V4;
}
}
@@ -820,12 +751,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STb_GP_cNotPt_V4 :
return Hexagon::STb_GP_cdnNotPt_V4;
- case Hexagon::STrib_GP_cPt_V4 :
- return Hexagon::STrib_GP_cdnPt_V4;
-
- case Hexagon::STrib_GP_cNotPt_V4 :
- return Hexagon::STrib_GP_cdnNotPt_V4;
-
// Store doubleword conditionally
case Hexagon::STrid_cPt :
return Hexagon::STrid_cdnPt_V4;
@@ -857,12 +782,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STd_GP_cNotPt_V4 :
return Hexagon::STd_GP_cdnNotPt_V4;
- case Hexagon::STrid_GP_cPt_V4 :
- return Hexagon::STrid_GP_cdnPt_V4;
-
- case Hexagon::STrid_GP_cNotPt_V4 :
- return Hexagon::STrid_GP_cdnNotPt_V4;
-
// Store halfword conditionally
case Hexagon::STrih_cPt :
return Hexagon::STrih_cdnPt_V4;
@@ -900,12 +819,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STh_GP_cNotPt_V4 :
return Hexagon::STh_GP_cdnNotPt_V4;
- case Hexagon::STrih_GP_cPt_V4 :
- return Hexagon::STrih_GP_cdnPt_V4;
-
- case Hexagon::STrih_GP_cNotPt_V4 :
- return Hexagon::STrih_GP_cdnNotPt_V4;
-
// Store word conditionally
case Hexagon::STriw_cPt :
return Hexagon::STriw_cdnPt_V4;
@@ -943,12 +856,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STw_GP_cNotPt_V4 :
return Hexagon::STw_GP_cdnNotPt_V4;
- case Hexagon::STriw_GP_cPt_V4 :
- return Hexagon::STriw_GP_cdnPt_V4;
-
- case Hexagon::STriw_GP_cNotPt_V4 :
- return Hexagon::STriw_GP_cdnNotPt_V4;
-
// Condtional Jumps
case Hexagon::JMP_c:
return Hexagon::JMP_cdnPt;
@@ -1091,72 +998,36 @@ static int GetDotNewPredOp(const int opc) {
// V4 indexed+scaled load
- case Hexagon::LDrid_indexed_cPt_V4 :
- return Hexagon::LDrid_indexed_cdnPt_V4;
-
- case Hexagon::LDrid_indexed_cNotPt_V4 :
- return Hexagon::LDrid_indexed_cdnNotPt_V4;
-
case Hexagon::LDrid_indexed_shl_cPt_V4 :
return Hexagon::LDrid_indexed_shl_cdnPt_V4;
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDrib_indexed_cPt_V4 :
- return Hexagon::LDrib_indexed_cdnPt_V4;
-
- case Hexagon::LDrib_indexed_cNotPt_V4 :
- return Hexagon::LDrib_indexed_cdnNotPt_V4;
-
case Hexagon::LDrib_indexed_shl_cPt_V4 :
return Hexagon::LDrib_indexed_shl_cdnPt_V4;
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriub_indexed_cPt_V4 :
- return Hexagon::LDriub_indexed_cdnPt_V4;
-
- case Hexagon::LDriub_indexed_cNotPt_V4 :
- return Hexagon::LDriub_indexed_cdnNotPt_V4;
-
case Hexagon::LDriub_indexed_shl_cPt_V4 :
return Hexagon::LDriub_indexed_shl_cdnPt_V4;
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDrih_indexed_cPt_V4 :
- return Hexagon::LDrih_indexed_cdnPt_V4;
-
- case Hexagon::LDrih_indexed_cNotPt_V4 :
- return Hexagon::LDrih_indexed_cdnNotPt_V4;
-
case Hexagon::LDrih_indexed_shl_cPt_V4 :
return Hexagon::LDrih_indexed_shl_cdnPt_V4;
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriuh_indexed_cPt_V4 :
- return Hexagon::LDriuh_indexed_cdnPt_V4;
-
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
- return Hexagon::LDriuh_indexed_cdnNotPt_V4;
-
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriw_indexed_cPt_V4 :
- return Hexagon::LDriw_indexed_cdnPt_V4;
-
- case Hexagon::LDriw_indexed_cNotPt_V4 :
- return Hexagon::LDriw_indexed_cdnNotPt_V4;
-
case Hexagon::LDriw_indexed_shl_cPt_V4 :
return Hexagon::LDriw_indexed_shl_cdnPt_V4;
@@ -1201,42 +1072,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::LDw_GP_cNotPt_V4:
return Hexagon::LDw_GP_cdnNotPt_V4;
- case Hexagon::LDrid_GP_cPt_V4:
- return Hexagon::LDrid_GP_cdnPt_V4;
-
- case Hexagon::LDrid_GP_cNotPt_V4:
- return Hexagon::LDrid_GP_cdnNotPt_V4;
-
- case Hexagon::LDrib_GP_cPt_V4:
- return Hexagon::LDrib_GP_cdnPt_V4;
-
- case Hexagon::LDrib_GP_cNotPt_V4:
- return Hexagon::LDrib_GP_cdnNotPt_V4;
-
- case Hexagon::LDriub_GP_cPt_V4:
- return Hexagon::LDriub_GP_cdnPt_V4;
-
- case Hexagon::LDriub_GP_cNotPt_V4:
- return Hexagon::LDriub_GP_cdnNotPt_V4;
-
- case Hexagon::LDrih_GP_cPt_V4:
- return Hexagon::LDrih_GP_cdnPt_V4;
-
- case Hexagon::LDrih_GP_cNotPt_V4:
- return Hexagon::LDrih_GP_cdnNotPt_V4;
-
- case Hexagon::LDriuh_GP_cPt_V4:
- return Hexagon::LDriuh_GP_cdnPt_V4;
-
- case Hexagon::LDriuh_GP_cNotPt_V4:
- return Hexagon::LDriuh_GP_cdnNotPt_V4;
-
- case Hexagon::LDriw_GP_cPt_V4:
- return Hexagon::LDriw_GP_cdnPt_V4;
-
- case Hexagon::LDriw_GP_cNotPt_V4:
- return Hexagon::LDriw_GP_cdnNotPt_V4;
-
// Conditional store new-value byte
case Hexagon::STrib_cPt_nv_V4 :
return Hexagon::STrib_cdnPt_nv_V4;
@@ -1264,12 +1099,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STb_GP_cNotPt_nv_V4 :
return Hexagon::STb_GP_cdnNotPt_nv_V4;
- case Hexagon::STrib_GP_cPt_nv_V4 :
- return Hexagon::STrib_GP_cdnPt_nv_V4;
-
- case Hexagon::STrib_GP_cNotPt_nv_V4 :
- return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
// Conditional store new-value halfword
case Hexagon::STrih_cPt_nv_V4 :
return Hexagon::STrih_cdnPt_nv_V4;
@@ -1297,12 +1126,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STh_GP_cNotPt_nv_V4 :
return Hexagon::STh_GP_cdnNotPt_nv_V4;
- case Hexagon::STrih_GP_cPt_nv_V4 :
- return Hexagon::STrih_GP_cdnPt_nv_V4;
-
- case Hexagon::STrih_GP_cNotPt_nv_V4 :
- return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
// Conditional store new-value word
case Hexagon::STriw_cPt_nv_V4 :
return Hexagon::STriw_cdnPt_nv_V4;
@@ -1330,12 +1153,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STw_GP_cNotPt_nv_V4 :
return Hexagon::STw_GP_cdnNotPt_nv_V4;
- case Hexagon::STriw_GP_cPt_nv_V4 :
- return Hexagon::STriw_GP_cdnPt_nv_V4;
-
- case Hexagon::STriw_GP_cNotPt_nv_V4 :
- return Hexagon::STriw_GP_cdnNotPt_nv_V4;
-
// Conditional add
case Hexagon::ADD_ri_cPt :
return Hexagon::ADD_ri_cdnPt;
@@ -1622,72 +1439,36 @@ static int GetDotOldOp(const int opc) {
// V4 indexed+scaled Load
- case Hexagon::LDrid_indexed_cdnPt_V4 :
- return Hexagon::LDrid_indexed_cPt_V4;
-
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
- return Hexagon::LDrid_indexed_cNotPt_V4;
-
case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
return Hexagon::LDrid_indexed_shl_cPt_V4;
case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrid_indexed_shl_cNotPt_V4;
- case Hexagon::LDrib_indexed_cdnPt_V4 :
- return Hexagon::LDrib_indexed_cPt_V4;
-
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
- return Hexagon::LDrib_indexed_cNotPt_V4;
-
case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
return Hexagon::LDrib_indexed_shl_cPt_V4;
case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrib_indexed_shl_cNotPt_V4;
- case Hexagon::LDriub_indexed_cdnPt_V4 :
- return Hexagon::LDriub_indexed_cPt_V4;
-
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
- return Hexagon::LDriub_indexed_cNotPt_V4;
-
case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
return Hexagon::LDriub_indexed_shl_cPt_V4;
case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDrih_indexed_cdnPt_V4 :
- return Hexagon::LDrih_indexed_cPt_V4;
-
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
- return Hexagon::LDrih_indexed_cNotPt_V4;
-
case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
return Hexagon::LDrih_indexed_shl_cPt_V4;
case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrih_indexed_shl_cNotPt_V4;
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
- return Hexagon::LDriuh_indexed_cPt_V4;
-
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
- return Hexagon::LDriuh_indexed_cNotPt_V4;
-
case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
return Hexagon::LDriuh_indexed_shl_cPt_V4;
case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriw_indexed_cdnPt_V4 :
- return Hexagon::LDriw_indexed_cPt_V4;
-
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
- return Hexagon::LDriw_indexed_cNotPt_V4;
-
case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
return Hexagon::LDriw_indexed_shl_cPt_V4;
@@ -1732,42 +1513,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::LDw_GP_cdnNotPt_V4:
return Hexagon::LDw_GP_cNotPt_V4;
- case Hexagon::LDrid_GP_cdnPt_V4:
- return Hexagon::LDrid_GP_cPt_V4;
-
- case Hexagon::LDrid_GP_cdnNotPt_V4:
- return Hexagon::LDrid_GP_cNotPt_V4;
-
- case Hexagon::LDrib_GP_cdnPt_V4:
- return Hexagon::LDrib_GP_cPt_V4;
-
- case Hexagon::LDrib_GP_cdnNotPt_V4:
- return Hexagon::LDrib_GP_cNotPt_V4;
-
- case Hexagon::LDriub_GP_cdnPt_V4:
- return Hexagon::LDriub_GP_cPt_V4;
-
- case Hexagon::LDriub_GP_cdnNotPt_V4:
- return Hexagon::LDriub_GP_cNotPt_V4;
-
- case Hexagon::LDrih_GP_cdnPt_V4:
- return Hexagon::LDrih_GP_cPt_V4;
-
- case Hexagon::LDrih_GP_cdnNotPt_V4:
- return Hexagon::LDrih_GP_cNotPt_V4;
-
- case Hexagon::LDriuh_GP_cdnPt_V4:
- return Hexagon::LDriuh_GP_cPt_V4;
-
- case Hexagon::LDriuh_GP_cdnNotPt_V4:
- return Hexagon::LDriuh_GP_cNotPt_V4;
-
- case Hexagon::LDriw_GP_cdnPt_V4:
- return Hexagon::LDriw_GP_cPt_V4;
-
- case Hexagon::LDriw_GP_cdnNotPt_V4:
- return Hexagon::LDriw_GP_cNotPt_V4;
-
// Conditional add
case Hexagon::ADD_ri_cdnPt :
@@ -1901,16 +1646,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STb_GP_cNotPt_nv_V4:
return Hexagon::STb_GP_cNotPt_V4;
- case Hexagon::STrib_GP_cdnPt_nv_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cPt_nv_V4:
- return Hexagon::STrib_GP_cPt_V4;
-
- case Hexagon::STrib_GP_cdnNotPt_nv_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cNotPt_nv_V4:
- return Hexagon::STrib_GP_cNotPt_V4;
-
// Store new-value byte - unconditional
case Hexagon::STrib_nv_V4:
return Hexagon::STrib;
@@ -1924,9 +1659,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STrib_shl_nv_V4:
return Hexagon::STrib_shl_V4;
- case Hexagon::STrib_GP_nv_V4:
- return Hexagon::STrib_GP_V4;
-
case Hexagon::STb_GP_nv_V4:
return Hexagon::STb_GP_V4;
@@ -1990,16 +1722,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STh_GP_cNotPt_nv_V4:
return Hexagon::STh_GP_cNotPt_V4;
- case Hexagon::STrih_GP_cdnPt_nv_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cPt_nv_V4:
- return Hexagon::STrih_GP_cPt_V4;
-
- case Hexagon::STrih_GP_cdnNotPt_nv_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cNotPt_nv_V4:
- return Hexagon::STrih_GP_cNotPt_V4;
-
// Store new-value halfword - unconditional
case Hexagon::STrih_nv_V4:
@@ -2014,9 +1736,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STrih_shl_nv_V4:
return Hexagon::STrih_shl_V4;
- case Hexagon::STrih_GP_nv_V4:
- return Hexagon::STrih_GP_V4;
-
case Hexagon::STh_GP_nv_V4:
return Hexagon::STh_GP_V4;
@@ -2081,16 +1800,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STw_GP_cNotPt_nv_V4:
return Hexagon::STw_GP_cNotPt_V4;
- case Hexagon::STriw_GP_cdnPt_nv_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cPt_nv_V4:
- return Hexagon::STriw_GP_cPt_V4;
-
- case Hexagon::STriw_GP_cdnNotPt_nv_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cNotPt_nv_V4:
- return Hexagon::STriw_GP_cNotPt_V4;
-
// Store new-value word - unconditional
case Hexagon::STriw_nv_V4:
@@ -2105,9 +1814,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STriw_shl_nv_V4:
return Hexagon::STriw_shl_V4;
- case Hexagon::STriw_GP_nv_V4:
- return Hexagon::STriw_GP_V4;
-
case Hexagon::STw_GP_nv_V4:
return Hexagon::STw_GP_V4;
@@ -2146,11 +1852,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STd_GP_cdnNotPt_V4 :
return Hexagon::STd_GP_cNotPt_V4;
- case Hexagon::STrid_GP_cdnPt_V4 :
- return Hexagon::STrid_GP_cPt_V4;
-
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- return Hexagon::STrid_GP_cNotPt_V4;
}
}
@@ -2248,28 +1949,16 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::LDriub_indexed_cdnPt :
case Hexagon::POST_LDriub_cPt :
case Hexagon::POST_LDriub_cdnPt_V4 :
- case Hexagon::LDrid_indexed_cPt_V4 :
- case Hexagon::LDrid_indexed_cdnPt_V4 :
case Hexagon::LDrid_indexed_shl_cPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrib_indexed_cPt_V4 :
- case Hexagon::LDrib_indexed_cdnPt_V4 :
case Hexagon::LDrib_indexed_shl_cPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriub_indexed_cPt_V4 :
- case Hexagon::LDriub_indexed_cdnPt_V4 :
case Hexagon::LDriub_indexed_shl_cPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrih_indexed_cPt_V4 :
- case Hexagon::LDrih_indexed_cdnPt_V4 :
case Hexagon::LDrih_indexed_shl_cPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_cPt_V4 :
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriw_indexed_cPt_V4 :
- case Hexagon::LDriw_indexed_cdnPt_V4 :
case Hexagon::LDriw_indexed_shl_cPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
case Hexagon::ADD_ri_cPt :
@@ -2298,42 +1987,22 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::ZXTB_cdnPt_V4 :
case Hexagon::ZXTH_cPt_V4 :
case Hexagon::ZXTH_cdnPt_V4 :
- case Hexagon::LDrid_GP_cPt_V4 :
- case Hexagon::LDrib_GP_cPt_V4 :
- case Hexagon::LDriub_GP_cPt_V4 :
- case Hexagon::LDrih_GP_cPt_V4 :
- case Hexagon::LDriuh_GP_cPt_V4 :
- case Hexagon::LDriw_GP_cPt_V4 :
case Hexagon::LDd_GP_cPt_V4 :
case Hexagon::LDb_GP_cPt_V4 :
case Hexagon::LDub_GP_cPt_V4 :
case Hexagon::LDh_GP_cPt_V4 :
case Hexagon::LDuh_GP_cPt_V4 :
case Hexagon::LDw_GP_cPt_V4 :
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
case Hexagon::STd_GP_cPt_V4 :
case Hexagon::STb_GP_cPt_V4 :
case Hexagon::STh_GP_cPt_V4 :
case Hexagon::STw_GP_cPt_V4 :
- case Hexagon::LDrid_GP_cdnPt_V4 :
- case Hexagon::LDrib_GP_cdnPt_V4 :
- case Hexagon::LDriub_GP_cdnPt_V4 :
- case Hexagon::LDrih_GP_cdnPt_V4 :
- case Hexagon::LDriuh_GP_cdnPt_V4 :
- case Hexagon::LDriw_GP_cdnPt_V4 :
case Hexagon::LDd_GP_cdnPt_V4 :
case Hexagon::LDb_GP_cdnPt_V4 :
case Hexagon::LDub_GP_cdnPt_V4 :
case Hexagon::LDh_GP_cdnPt_V4 :
case Hexagon::LDuh_GP_cdnPt_V4 :
case Hexagon::LDw_GP_cdnPt_V4 :
- case Hexagon::STrid_GP_cdnPt_V4 :
- case Hexagon::STrib_GP_cdnPt_V4 :
- case Hexagon::STrih_GP_cdnPt_V4 :
- case Hexagon::STriw_GP_cdnPt_V4 :
case Hexagon::STd_GP_cdnPt_V4 :
case Hexagon::STb_GP_cdnPt_V4 :
case Hexagon::STh_GP_cdnPt_V4 :
@@ -2419,28 +2088,16 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::LDriub_indexed_cdnNotPt :
case Hexagon::POST_LDriub_cNotPt :
case Hexagon::POST_LDriub_cdnNotPt_V4 :
- case Hexagon::LDrid_indexed_cNotPt_V4 :
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_cNotPt_V4 :
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_cNotPt_V4 :
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_cNotPt_V4 :
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_cNotPt_V4 :
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
case Hexagon::ADD_ri_cNotPt :
@@ -2470,42 +2127,22 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::ZXTH_cNotPt_V4 :
case Hexagon::ZXTH_cdnNotPt_V4 :
- case Hexagon::LDrid_GP_cNotPt_V4 :
- case Hexagon::LDrib_GP_cNotPt_V4 :
- case Hexagon::LDriub_GP_cNotPt_V4 :
- case Hexagon::LDrih_GP_cNotPt_V4 :
- case Hexagon::LDriuh_GP_cNotPt_V4 :
- case Hexagon::LDriw_GP_cNotPt_V4 :
case Hexagon::LDd_GP_cNotPt_V4 :
case Hexagon::LDb_GP_cNotPt_V4 :
case Hexagon::LDub_GP_cNotPt_V4 :
case Hexagon::LDh_GP_cNotPt_V4 :
case Hexagon::LDuh_GP_cNotPt_V4 :
case Hexagon::LDw_GP_cNotPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
case Hexagon::STd_GP_cNotPt_V4 :
case Hexagon::STb_GP_cNotPt_V4 :
case Hexagon::STh_GP_cNotPt_V4 :
case Hexagon::STw_GP_cNotPt_V4 :
- case Hexagon::LDrid_GP_cdnNotPt_V4 :
- case Hexagon::LDrib_GP_cdnNotPt_V4 :
- case Hexagon::LDriub_GP_cdnNotPt_V4 :
- case Hexagon::LDrih_GP_cdnNotPt_V4 :
- case Hexagon::LDriuh_GP_cdnNotPt_V4 :
- case Hexagon::LDriw_GP_cdnNotPt_V4 :
case Hexagon::LDd_GP_cdnNotPt_V4 :
case Hexagon::LDb_GP_cdnNotPt_V4 :
case Hexagon::LDub_GP_cdnNotPt_V4 :
case Hexagon::LDh_GP_cdnNotPt_V4 :
case Hexagon::LDuh_GP_cdnNotPt_V4 :
case Hexagon::LDw_GP_cdnNotPt_V4 :
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- case Hexagon::STrib_GP_cdnNotPt_V4 :
- case Hexagon::STrih_GP_cdnNotPt_V4 :
- case Hexagon::STriw_GP_cdnNotPt_V4 :
case Hexagon::STd_GP_cdnNotPt_V4 :
case Hexagon::STb_GP_cdnNotPt_V4 :
case Hexagon::STh_GP_cdnNotPt_V4 :
@@ -2516,203 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI,
return false;
}
-bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
- if (isNewValueInst(MI))
- return true;
-
- switch (MI->getOpcode()) {
- case Hexagon::TFR_cdnNotPt:
- case Hexagon::TFR_cdnPt:
- case Hexagon::TFRI_cdnNotPt:
- case Hexagon::TFRI_cdnPt:
- case Hexagon::LDrid_cdnPt :
- case Hexagon::LDrid_cdnNotPt :
- case Hexagon::LDrid_indexed_cdnPt :
- case Hexagon::LDrid_indexed_cdnNotPt :
- case Hexagon::POST_LDrid_cdnPt_V4 :
- case Hexagon::POST_LDrid_cdnNotPt_V4 :
- case Hexagon::LDriw_cdnPt :
- case Hexagon::LDriw_cdnNotPt :
- case Hexagon::LDriw_indexed_cdnPt :
- case Hexagon::LDriw_indexed_cdnNotPt :
- case Hexagon::POST_LDriw_cdnPt_V4 :
- case Hexagon::POST_LDriw_cdnNotPt_V4 :
- case Hexagon::LDrih_cdnPt :
- case Hexagon::LDrih_cdnNotPt :
- case Hexagon::LDrih_indexed_cdnPt :
- case Hexagon::LDrih_indexed_cdnNotPt :
- case Hexagon::POST_LDrih_cdnPt_V4 :
- case Hexagon::POST_LDrih_cdnNotPt_V4 :
- case Hexagon::LDrib_cdnPt :
- case Hexagon::LDrib_cdnNotPt :
- case Hexagon::LDrib_indexed_cdnPt :
- case Hexagon::LDrib_indexed_cdnNotPt :
- case Hexagon::POST_LDrib_cdnPt_V4 :
- case Hexagon::POST_LDrib_cdnNotPt_V4 :
- case Hexagon::LDriuh_cdnPt :
- case Hexagon::LDriuh_cdnNotPt :
- case Hexagon::LDriuh_indexed_cdnPt :
- case Hexagon::LDriuh_indexed_cdnNotPt :
- case Hexagon::POST_LDriuh_cdnPt_V4 :
- case Hexagon::POST_LDriuh_cdnNotPt_V4 :
- case Hexagon::LDriub_cdnPt :
- case Hexagon::LDriub_cdnNotPt :
- case Hexagon::LDriub_indexed_cdnPt :
- case Hexagon::LDriub_indexed_cdnNotPt :
- case Hexagon::POST_LDriub_cdnPt_V4 :
- case Hexagon::POST_LDriub_cdnNotPt_V4 :
-
- case Hexagon::LDrid_indexed_cdnPt_V4 :
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
- case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_cdnPt_V4 :
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_cdnPt_V4 :
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_cdnPt_V4 :
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_cdnPt_V4 :
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
-
-// Coditional add
- case Hexagon::ADD_ri_cdnPt:
- case Hexagon::ADD_ri_cdnNotPt:
- case Hexagon::ADD_rr_cdnPt:
- case Hexagon::ADD_rr_cdnNotPt:
-
- // Conditional logical operations
- case Hexagon::XOR_rr_cdnPt :
- case Hexagon::XOR_rr_cdnNotPt :
- case Hexagon::AND_rr_cdnPt :
- case Hexagon::AND_rr_cdnNotPt :
- case Hexagon::OR_rr_cdnPt :
- case Hexagon::OR_rr_cdnNotPt :
-
- // Conditonal subtract
- case Hexagon::SUB_rr_cdnPt :
- case Hexagon::SUB_rr_cdnNotPt :
-
- // Conditional combine
- case Hexagon::COMBINE_rr_cdnPt :
- case Hexagon::COMBINE_rr_cdnNotPt :
-
- // Conditional shift operations
- case Hexagon::ASLH_cdnPt_V4:
- case Hexagon::ASLH_cdnNotPt_V4:
- case Hexagon::ASRH_cdnPt_V4:
- case Hexagon::ASRH_cdnNotPt_V4:
- case Hexagon::SXTB_cdnPt_V4:
- case Hexagon::SXTB_cdnNotPt_V4:
- case Hexagon::SXTH_cdnPt_V4:
- case Hexagon::SXTH_cdnNotPt_V4:
- case Hexagon::ZXTB_cdnPt_V4:
- case Hexagon::ZXTB_cdnNotPt_V4:
- case Hexagon::ZXTH_cdnPt_V4:
- case Hexagon::ZXTH_cdnNotPt_V4:
-
- // Conditional stores
- case Hexagon::STrib_imm_cdnPt_V4 :
- case Hexagon::STrib_imm_cdnNotPt_V4 :
- case Hexagon::STrib_cdnPt_V4 :
- case Hexagon::STrib_cdnNotPt_V4 :
- case Hexagon::STrib_indexed_cdnPt_V4 :
- case Hexagon::STrib_indexed_cdnNotPt_V4 :
- case Hexagon::POST_STbri_cdnPt_V4 :
- case Hexagon::POST_STbri_cdnNotPt_V4 :
- case Hexagon::STrib_indexed_shl_cdnPt_V4 :
- case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
-
- // Store doubleword conditionally
- case Hexagon::STrid_indexed_cdnPt_V4 :
- case Hexagon::STrid_indexed_cdnNotPt_V4 :
- case Hexagon::STrid_indexed_shl_cdnPt_V4 :
- case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_STdri_cdnPt_V4 :
- case Hexagon::POST_STdri_cdnNotPt_V4 :
-
- // Store halfword conditionally
- case Hexagon::STrih_cdnPt_V4 :
- case Hexagon::STrih_cdnNotPt_V4 :
- case Hexagon::STrih_indexed_cdnPt_V4 :
- case Hexagon::STrih_indexed_cdnNotPt_V4 :
- case Hexagon::STrih_imm_cdnPt_V4 :
- case Hexagon::STrih_imm_cdnNotPt_V4 :
- case Hexagon::STrih_indexed_shl_cdnPt_V4 :
- case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_SThri_cdnPt_V4 :
- case Hexagon::POST_SThri_cdnNotPt_V4 :
-
- // Store word conditionally
- case Hexagon::STriw_cdnPt_V4 :
- case Hexagon::STriw_cdnNotPt_V4 :
- case Hexagon::STriw_indexed_cdnPt_V4 :
- case Hexagon::STriw_indexed_cdnNotPt_V4 :
- case Hexagon::STriw_imm_cdnPt_V4 :
- case Hexagon::STriw_imm_cdnNotPt_V4 :
- case Hexagon::STriw_indexed_shl_cdnPt_V4 :
- case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_STwri_cdnPt_V4 :
- case Hexagon::POST_STwri_cdnNotPt_V4 :
-
- case Hexagon::LDd_GP_cdnPt_V4:
- case Hexagon::LDd_GP_cdnNotPt_V4:
- case Hexagon::LDb_GP_cdnPt_V4:
- case Hexagon::LDb_GP_cdnNotPt_V4:
- case Hexagon::LDub_GP_cdnPt_V4:
- case Hexagon::LDub_GP_cdnNotPt_V4:
- case Hexagon::LDh_GP_cdnPt_V4:
- case Hexagon::LDh_GP_cdnNotPt_V4:
- case Hexagon::LDuh_GP_cdnPt_V4:
- case Hexagon::LDuh_GP_cdnNotPt_V4:
- case Hexagon::LDw_GP_cdnPt_V4:
- case Hexagon::LDw_GP_cdnNotPt_V4:
- case Hexagon::LDrid_GP_cdnPt_V4:
- case Hexagon::LDrid_GP_cdnNotPt_V4:
- case Hexagon::LDrib_GP_cdnPt_V4:
- case Hexagon::LDrib_GP_cdnNotPt_V4:
- case Hexagon::LDriub_GP_cdnPt_V4:
- case Hexagon::LDriub_GP_cdnNotPt_V4:
- case Hexagon::LDrih_GP_cdnPt_V4:
- case Hexagon::LDrih_GP_cdnNotPt_V4:
- case Hexagon::LDriuh_GP_cdnPt_V4:
- case Hexagon::LDriuh_GP_cdnNotPt_V4:
- case Hexagon::LDriw_GP_cdnPt_V4:
- case Hexagon::LDriw_GP_cdnNotPt_V4:
-
- case Hexagon::STrid_GP_cdnPt_V4:
- case Hexagon::STrid_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
- case Hexagon::STd_GP_cdnPt_V4:
- case Hexagon::STd_GP_cdnNotPt_V4:
- case Hexagon::STb_GP_cdnPt_V4:
- case Hexagon::STb_GP_cdnNotPt_V4:
- case Hexagon::STh_GP_cdnPt_V4:
- case Hexagon::STh_GP_cdnNotPt_V4:
- case Hexagon::STw_GP_cdnPt_V4:
- case Hexagon::STw_GP_cdnNotPt_V4:
- return true;
- }
- return false;
-}
-
static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
const HexagonInstrInfo *QII) {
assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
@@ -2883,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
// sense, i.e, either both should be negated or both should be none negated.
if (( predRegNumDst != predRegNumSrc) ||
- isDotNewInst(PacketMI) != isDotNewInst(MI) ||
+ QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
return false;
}
@@ -2993,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC )
{
- // already a dot new instruction
- if (isDotNewInst(MI) && !IsNewifyStore(MI))
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Already a dot new instruction.
+ if (QII->isDotNewInst(MI) && !IsNewifyStore(MI))
return false;
if (!isNewifiable(MI))
@@ -3009,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
else {
// Create a dot new machine instruction to see if resources can be
// allocated. If not, bail out now.
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
int NewOpcode = GetDotNewOp(MI->getOpcode());
const MCInstrDesc &desc = QII->get(NewOpcode);
DebugLoc dl;
@@ -3152,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
// !p0 is not complimentary to p0.new
return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
(GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
- (isDotNewInst(MI1) == isDotNewInst(MI2)));
+ (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
}
// initPacketizerState - Initialize packetizer flags
@@ -3277,13 +2717,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// dealloc_return and memop always take SLOT0.
// Arch spec 3.4.4.2
if (QRI->Subtarget.hasV4TOps()) {
-
- if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+ if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+ (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
Dependence = true;
return false;
}
- if ( (QII->isMemOp(J) && MCIDI.mayStore())
+ if ((QII->isMemOp(J) && MCIDI.mayStore())
|| (MCIDJ.mayStore() && QII->isMemOp(I))
|| (QII->isMemOp(J) && QII->isMemOp(I))) {
Dependence = true;
@@ -3580,7 +3020,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
MachineInstr *nvjMI = MII;
assert(ResourceTracker->canReserveResources(MI));
ResourceTracker->reserveResources(MI);
- if (QII->isExtended(MI) &&
+ if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
!tryAllocateResourcesForConstExt(MI)) {
endPacket(MBB, MI);
ResourceTracker->reserveResources(MI);
@@ -3600,7 +3040,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
&& (!tryAllocateResourcesForConstExt(nvjMI)
|| !ResourceTracker->canReserveResources(nvjMI)))
|| // For non-extended instruction, no need to allocate extra 4 bytes.
- (!QII->isExtended(nvjMI) &&
+ (!QII->isExtended(nvjMI) &&
!ResourceTracker->canReserveResources(nvjMI)))
{
endPacket(MBB, MI);
@@ -3616,7 +3056,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
CurrentPacketMIs.push_back(MI);
CurrentPacketMIs.push_back(nvjMI);
} else {
- if ( QII->isExtended(MI)
+ if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
&& ( !tryAllocateResourcesForConstExt(MI)
|| !ResourceTracker->canReserveResources(MI)))
{
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 035afe88d5bc..36da6dfcc3d0 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
+#include "Hexagon.h"
#include "HexagonInstPrinter.h"
-#include "HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
@@ -28,6 +28,8 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
+const char HexagonInstPrinter::PacketPadding = '\t';
+
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
}
@@ -43,43 +45,42 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
StringRef Annot) {
- const char packetPadding[] = " ";
const char startPacket = '{',
endPacket = '}';
// TODO: add outer HW loop when it's supported too.
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
// Ending a harware loop is different from ending an regular packet.
- assert(MI->isEndPacket() && "Loop end must also end the packet");
+ assert(MI->isPacketEnd() && "Loop-end must also end the packet");
- if (MI->isStartPacket()) {
+ if (MI->isPacketStart()) {
// There must be a packet to end a loop.
// FIXME: when shuffling is always run, this shouldn't be needed.
HexagonMCInst Nop;
StringRef NoAnnot;
Nop.setOpcode (Hexagon::NOP);
- Nop.setStartPacket (MI->isStartPacket());
+ Nop.setPacketStart (MI->isPacketStart());
printInst (&Nop, O, NoAnnot);
}
// Close the packet.
- if (MI->isEndPacket())
- O << packetPadding << endPacket;
+ if (MI->isPacketEnd())
+ O << PacketPadding << endPacket;
printInstruction(MI, O);
}
else {
// Prefix the insn opening the packet.
- if (MI->isStartPacket())
- O << packetPadding << startPacket << '\n';
+ if (MI->isPacketStart())
+ O << PacketPadding << startPacket << '\n';
printInstruction(MI, O);
// Suffix the insn closing the packet.
- if (MI->isEndPacket())
+ if (MI->isPacketEnd())
// Suffix the packet in a new line always, since the GNU assembler has
// issues with a closing brace on the same line as CONST{32,64}.
- O << '\n' << packetPadding << endPacket;
+ O << '\n' << PacketPadding << endPacket;
}
printAnnotation(O, Annot);
@@ -102,12 +103,23 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ if(MO.isExpr()) {
+ O << *MO.getExpr();
+ } else if(MO.isImm()) {
+ O << MI->getOperand(OpNo).getImm();
+ } else {
+ llvm_unreachable("Unknown operand");
+ }
}
void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
+ const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
+ if (HMCI->isConstExtended())
+ O << "#";
+ printOperand(MI, OpNo, O);
}
void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
index 902a32352f1c..d0cef683da95 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -14,16 +14,18 @@
#ifndef HEXAGONINSTPRINTER_H
#define HEXAGONINSTPRINTER_H
-#include "HexagonMCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
+ class HexagonMCInst;
+
class HexagonInstPrinter : public MCInstPrinter {
public:
explicit HexagonInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
@@ -65,10 +67,19 @@ namespace llvm {
void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
{ printSymbol(MI, OpNo, O, false); }
- bool isConstExtended(const MCInst *MI) const;
+ const MCInstrInfo &getMII() const {
+ return MII;
+ }
+
protected:
void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
const;
+
+ static const char PacketPadding;
+
+ private:
+ const MCInstrInfo &MII;
+
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
index 8678401feee4..59849aa7e1c7 100644
--- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = HexagonAsmPrinter
parent = Hexagon
-required_libraries = MC Support
+required_libraries = HexagonDesc MC Support
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 8e3da99404ee..62b9b600ce8e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMHexagonDesc
- HexagonMCTargetDesc.cpp
HexagonMCAsmInfo.cpp
+ HexagonMCInst.cpp
+ HexagonMCTargetDesc.cpp
)
add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 7221e906342e..d4a93b5c87a4 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -17,6 +17,9 @@
#ifndef HEXAGONBASEINFO_H
#define HEXAGONBASEINFO_H
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
namespace llvm {
/// HexagonII - This namespace holds all of the target specific flags that
@@ -28,22 +31,50 @@ namespace HexagonII {
// Insn types.
// *** Must match HexagonInstrFormat*.td ***
enum Type {
- TypePSEUDO = 0,
- TypeALU32 = 1,
- TypeCR = 2,
- TypeJR = 3,
- TypeJ = 4,
- TypeLD = 5,
- TypeST = 6,
- TypeSYSTEM = 7,
- TypeXTYPE = 8,
- TypeMEMOP = 9,
- TypeNV = 10,
- TypePREFIX = 30, // Such as extenders.
- TypeMARKER = 31 // Such as end of a HW loop.
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypePREFIX = 30, // Such as extenders.
+ TypeENDLOOP = 31 // Such as end of a HW loop.
};
+ enum SubTarget {
+ HasV2SubT = 0xf,
+ HasV2SubTOnly = 0x1,
+ NoV2SubT = 0x0,
+ HasV3SubT = 0xe,
+ HasV3SubTOnly = 0x2,
+ NoV3SubT = 0x1,
+ HasV4SubT = 0xc,
+ NoV4SubT = 0x3,
+ HasV5SubT = 0x8,
+ NoV5SubT = 0x7
+ };
+ enum AddrMode {
+ NoAddrMode = 0, // No addressing mode
+ Absolute = 1, // Absolute addressing mode
+ AbsoluteSet = 2, // Absolute set addressing mode
+ BaseImmOffset = 3, // Indirect with offset
+ BaseLongOffset = 4, // Indirect with long offset
+ BaseRegOffset = 5 // Indirect with register offset
+ };
+
+ enum MemAccessSize {
+ NoMemAccess = 0, // Not a memory acces instruction.
+ ByteAccess = 1, // Byte access instruction (memb).
+ HalfWordAccess = 2, // Half word access instruction (memh).
+ WordAccess = 3, // Word access instrution (memw).
+ DoubleWordAccess = 4 // Double word access instruction (memd)
+ };
// MCInstrDesc TSFlags
// *** Must match HexagonInstrFormat*.td ***
@@ -58,11 +89,93 @@ namespace HexagonII {
// Predicated instructions.
PredicatedPos = 6,
- PredicatedMask = 0x1
+ PredicatedMask = 0x1,
+ PredicatedFalsePos = 7,
+ PredicatedFalseMask = 0x1,
+ PredicatedNewPos = 8,
+ PredicatedNewMask = 0x1,
+
+ // New-Value consumer instructions.
+ NewValuePos = 9,
+ NewValueMask = 0x1,
+
+ // New-Value producer instructions.
+ hasNewValuePos = 10,
+ hasNewValueMask = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos = 11,
+ NewValueOpMask = 0x7,
+
+ // Which bits encode the new value.
+ NewValueBitsPos = 14,
+ NewValueBitsMask = 0x3,
+
+ // Stores that can become new-value stores.
+ mayNVStorePos = 16,
+ mayNVStoreMask = 0x1,
+
+ // New-value store instructions.
+ NVStorePos = 17,
+ NVStoreMask = 0x1,
+
+ // Extendable insns.
+ ExtendablePos = 18,
+ ExtendableMask = 0x1,
+
+ // Insns must be extended.
+ ExtendedPos = 19,
+ ExtendedMask = 0x1,
+
+ // Which operand may be extended.
+ ExtendableOpPos = 20,
+ ExtendableOpMask = 0x7,
+
+ // Signed or unsigned range.
+ ExtentSignedPos = 23,
+ ExtentSignedMask = 0x1,
+
+ // Number of bits of range before extending operand.
+ ExtentBitsPos = 24,
+ ExtentBitsMask = 0x1f,
+
+ // Valid subtargets
+ validSubTargetPos = 29,
+ validSubTargetMask = 0xf,
+
+ // Addressing mode for load/store instructions.
+ AddrModePos = 33,
+ AddrModeMask = 0x7,
+
+ // Access size of memory access instructions (load/store).
+ MemAccessSizePos = 36,
+ MemAccesSizeMask = 0x7
};
// *** The code above must match HexagonInstrFormat*.td *** //
+ // Hexagon specific MO operand flag mask.
+ enum HexagonMOTargetFlagVal {
+ //===------------------------------------------------------------------===//
+ // Hexagon Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ HMOTF_ConstExtended = 1,
+
+ /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+ /// Used for computing a global address for PIC compilations
+ MO_PCREL,
+
+ /// MO_GOT - Indicates a GOT-relative relocation
+ MO_GOT,
+
+ // Low or high part of a symbol.
+ MO_LO16, MO_HI16,
+
+ // Offset from the base of the SDA.
+ MO_GPREL
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 86f75d1c2d7a..3deb8d1deb42 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
AscizDirective = "\t.string\t";
WeakRefDirective = "\t.weak\t";
+ SupportsDebugInformation = true;
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
new file mode 100644
index 000000000000..9260b4a27661
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -0,0 +1,175 @@
+//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some Hexagon VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
+using namespace llvm;
+
+// Return the slots used by the insn.
+unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
+ const HexagonInstrInfo* QII = TM->getInstrInfo();
+ const InstrItineraryData* II = TM->getInstrItineraryData();
+ const InstrStage*
+ IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
+
+ return (IS->getUnits());
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInst::getType() const {
+ const uint64_t F = MCID->TSFlags;
+
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInst::isCanon() const {
+ return (!MCID->isPseudo() &&
+ !isPrefix() &&
+ getType() != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInst::isPrefix() const {
+ return (getType() == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInst::isSolo() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInst::isNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInst::hasNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return the operand that consumes or produces a new value.
+const MCOperand& HexagonMCInst::getNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ const unsigned O = (F >> HexagonII::NewValueOpPos) &
+ HexagonII::NewValueOpMask;
+ const MCOperand& MCO = getOperand(O);
+
+ assert ((isNewValue() || hasNewValue()) && MCO.isReg());
+ return (MCO);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+// out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInst::isConstExtended(void) const {
+ if (isExtended())
+ return true;
+
+ if (!isExtendable())
+ return false;
+
+ short ExtOpNum = getCExtOpNum();
+ int MinValue = getMinValue();
+ int MaxValue = getMaxValue();
+ const MCOperand& MO = getOperand(ExtOpNum);
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isExpr())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int ImmValue = MO.getImm();
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInst::isExtended(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInst::isExtendable(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return number of bits in the constant extended operand.
+unsigned HexagonMCInst::getBitCount(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInst::getCExtOpNum(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMinValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMaxValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
new file mode 100644
index 000000000000..3ca71f00b241
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -0,0 +1,100 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "HexagonTargetMachine.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class HexagonMCInst: public MCInst {
+ // MCID is set during instruction lowering.
+ // It is needed in order to access TSFlags for
+ // use in checking MC instruction properties.
+ const MCInstrDesc *MCID;
+
+ // Packet start and end markers
+ unsigned packetStart: 1, packetEnd: 1;
+
+ public:
+ explicit HexagonMCInst():
+ MCInst(), MCID(0), packetStart(0), packetEnd(0) {};
+ HexagonMCInst(const MCInstrDesc& mcid):
+ MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
+
+ bool isPacketStart() const { return (packetStart); };
+ bool isPacketEnd() const { return (packetEnd); };
+ void setPacketStart(bool Y) { packetStart = Y; };
+ void setPacketEnd(bool Y) { packetEnd = Y; };
+ void resetPacket() { setPacketStart(false); setPacketEnd(false); };
+
+ // Return the slots used by the insn.
+ unsigned getUnits(const HexagonTargetMachine* TM) const;
+
+ // Return the Hexagon ISA class for the insn.
+ unsigned getType() const;
+
+ void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; };
+ const MCInstrDesc& getDesc(void) const { return *MCID; };
+
+ // Return whether the insn is an actual insn.
+ bool isCanon() const;
+
+ // Return whether the insn is a prefix.
+ bool isPrefix() const;
+
+ // Return whether the insn is solo, i.e., cannot be in a packet.
+ bool isSolo() const;
+
+ // Return whether the instruction needs to be constant extended.
+ bool isConstExtended() const;
+
+ // Return constant extended operand number.
+ unsigned short getCExtOpNum(void) const;
+
+ // Return whether the insn is a new-value consumer.
+ bool isNewValue() const;
+
+ // Return whether the instruction is a legal new-value producer.
+ bool hasNewValue() const;
+
+ // Return the operand that consumes or produces a new value.
+ const MCOperand& getNewValue() const;
+
+ // Return number of bits in the constant extended operand.
+ unsigned getBitCount(void) const;
+
+ private:
+ // Return whether the instruction must be always extended.
+ bool isExtended() const;
+
+ // Return true if the insn may be extended based on the operand value.
+ bool isExtendable() const;
+
+ // Return true if the operand can be constant extended.
+ bool isOperandExtended(const unsigned short OperandNum) const;
+
+ // Return the min value that a constant extendable operand can have
+ // without being extended.
+ int getMinValue() const;
+
+ // Return the max value that a constant extendable operand can have
+ // without being extended.
+ int getMaxValue() const;
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 3cfa4fddd87c..6b1d2d161958 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "HexagonMCTargetDesc.h"
#include "HexagonMCAsmInfo.h"
+#include "InstPrinter/HexagonInstPrinter.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 7aa5dd3b8980..40f6c8d23ea8 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Hexagon.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8995080974cc..c06e8bc3cdbe 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index 813767ba6d65..4a7d8e8d8887 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -2,7 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMMBlazeAsmParser
- MBlazeAsmLexer.cpp
MBlazeAsmParser.cpp
)
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
deleted file mode 100644
index 59a1ed97d3d4..000000000000
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/MBlazeBaseInfo.h"
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-
-#include "llvm/Support/TargetRegistry.h"
-
-#include <string>
-#include <map>
-
-using namespace llvm;
-
-namespace {
-
- class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
- protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const MCRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
- public:
- MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI) {
- }
- };
-
- class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
- public:
- MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
- const MCAsmInfo &MAI)
- : MBlazeBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
- };
-}
-
-AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default:
- return AsmToken(lexedToken);
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Identifier:
- {
- unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
- if (regID) {
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- } else {
- return AsmToken(lexedToken);
- }
- }
- }
-}
-
-extern "C" void LLVMInitializeMBlazeAsmLexer() {
- RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
-}
-
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index f7809caeb32f..dda6e247ac4f 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -8,18 +8,18 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Twine.h"
using namespace llvm;
namespace {
@@ -35,7 +35,8 @@ class MBlazeAsmParser : public MCTargetAsmParser {
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- MBlazeOperand *ParseRegister(unsigned &RegNo);
+ MBlazeOperand *ParseRegister();
+ MBlazeOperand *ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc);
MBlazeOperand *ParseImmediate();
MBlazeOperand *ParseFsl();
MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -81,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- unsigned OffReg;
- const MCExpr *Off;
- } Mem;
-
- struct {
- const MCExpr *Val;
- } FslImm;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
};
MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -383,23 +390,31 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- return (ParseRegister(RegNo) == 0);
+ MBlazeOperand *Reg = ParseRegister(StartLoc, EndLoc);
+ if (!Reg)
+ return true;
+ RegNo = Reg->getReg();
+ return false;
}
-MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
- SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+MBlazeOperand *MBlazeAsmParser::ParseRegister() {
+ SMLoc S, E;
+ return ParseRegister(S, E);
+}
- switch (getLexer().getKind()) {
- default: return 0;
- case AsmToken::Identifier:
- RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
- if (RegNo == 0)
- return 0;
+MBlazeOperand *MBlazeAsmParser::ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
- getLexer().Lex();
- return MBlazeOperand::CreateReg(RegNo, S, E);
- }
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return 0;
+
+ unsigned RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, StartLoc, EndLoc);
}
static unsigned MatchFslRegister(StringRef String) {
@@ -415,7 +430,7 @@ static unsigned MatchFslRegister(StringRef String) {
MBlazeOperand *MBlazeAsmParser::ParseFsl() {
SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E = Parser.getTok().getEndLoc();
switch (getLexer().getKind()) {
default: return 0;
@@ -432,7 +447,7 @@ MBlazeOperand *MBlazeAsmParser::ParseFsl() {
MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E = Parser.getTok().getEndLoc();
const MCExpr *EVal;
switch (getLexer().getKind()) {
@@ -442,7 +457,7 @@ MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::Identifier:
- if (getParser().ParseExpression(EVal))
+ if (getParser().parseExpression(EVal))
return 0;
return MBlazeOperand::CreateImm(EVal, S, E);
@@ -454,8 +469,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
MBlazeOperand *Op;
// Attempt to parse the next token as a register name
- unsigned RegNo;
- Op = ParseRegister(RegNo);
+ Op = ParseRegister();
// Attempt to parse the next token as an FSL immediate
if (!Op)
@@ -529,10 +543,10 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -548,12 +562,9 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-extern "C" void LLVMInitializeMBlazeAsmLexer();
-
/// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmParser() {
RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
- LLVMInitializeMBlazeAsmLexer();
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 0bf93d71dab8..91a41f39b5d8 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(MBlazeCommonTableGen)
add_llvm_target(MBlazeCodeGen
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 6b958c85eebf..c03ab3803b60 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -12,10 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
#include "MBlazeDisassembler.h"
-
-#include "llvm/MC/EDInstInfo.h"
+#include "MBlaze.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -26,7 +24,6 @@
// #include "MBlazeGenDecoderTables.inc"
// #include "MBlazeGenRegisterNames.inc"
-#include "MBlazeGenEDInfo.inc"
namespace llvm {
extern const MCInstrDesc MBlazeInsts[];
@@ -492,10 +489,6 @@ static unsigned getOPCODE(uint32_t insn) {
}
}
-const EDInstInfo *MBlazeDisassembler::getEDInfo() const {
- return instInfoMBlaze;
-}
-
//
// Public interface for the disassembler
//
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index 5c4ae3b1ace8..b8ff8f607265 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -23,8 +23,6 @@ class MCInst;
class MemoryObject;
class raw_ostream;
-struct EDInstInfo;
-
/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
class MBlazeDisassembler : public MCDisassembler {
public:
@@ -44,9 +42,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
};
} // namespace llvm
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
index a1f1dbc7a23b..fc2b3d51b44c 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -12,11 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "MBlaze.h"
#include "MBlazeInstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MBlaze.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index b679a318c3e0..7dafaef0af08 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -15,32 +15,32 @@
#define DEBUG_TYPE "mblaze-asm-printer"
#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
#include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
-#include "MBlazeMachineFunction.h"
#include "MBlazeMCInstLower.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 19e787d8622d..3d0d1cecd1f1 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -16,14 +16,14 @@
#include "MBlaze.h"
#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index 9e467bf337e0..172304bd5b45 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -14,21 +14,21 @@
#define DEBUG_TYPE "mblaze-frame-lowering"
#include "MBlazeFrameLowering.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
#include "MBlazeInstrInfo.h"
#include "MBlazeMachineFunction.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -426,6 +426,45 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+
void MBlazeFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 01e6578a352f..f4228c5f0890 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -39,6 +39,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 6b4349766f37..78ad24debb1b 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -17,21 +17,21 @@
#include "MBlazeRegisterInfo.h"
#include "MBlazeSubtarget.h"
#include "MBlazeTargetMachine.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 310c25e839c3..d4f943297acb 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -15,14 +15,9 @@
#define DEBUG_TYPE "mblaze-lower"
#include "MBlazeISelLowering.h"
#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
#include "MBlazeTargetMachine.h"
#include "MBlazeTargetObjectFile.h"
-#include "MBlazeSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +25,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand);
@@ -159,7 +160,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
// Operations not directly supported by MBlaze.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
@@ -1027,15 +1029,17 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ RetOps.push_back(DAG.getRegister(Reg, MVT::i32));
+
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1048,20 +1052,16 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// guarantee that all emitted copies are
// stuck together, avoiding something bad
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // If this function is using the interrupt_handler calling convention
- // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
- unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
- : MBlazeISD::Ret;
- unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
- : MBlaze::R15;
- SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+ RetOps[0] = Chain; // Update chain.
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+ RetOps.push_back(Flag);
- return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+ return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index a01fab567c8a..f6b4095a93dc 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -17,8 +17,8 @@
#include "MBlaze.h"
#include "MBlazeSubtarget.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b5025fc8ee6c..79449f73f74e 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -12,15 +12,15 @@
//===----------------------------------------------------------------------===//
#include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 139bf7156a69..f86bc0b0b5a4 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -28,9 +28,9 @@ def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
[SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 1c2e3b26613e..8d262a01e706 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "MBlazeIntrinsicInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstring>
using namespace llvm;
@@ -104,7 +104,7 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTy) const {
assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
- AttrListPtr AList = getAttributes(M->getContext(),
+ AttributeSet AList = getAttributes(M->getContext(),
(mblazeIntrinsic::ID) IntrID);
return cast<Function>(M->getOrInsertFunction(getName(IntrID),
getType(M->getContext(), IntrID),
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index 6b9f42ec91a6..ad414ac40fd7 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -14,19 +14,19 @@
#include "MBlazeMCInstLower.h"
#include "MBlazeInstrInfo.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
using namespace llvm;
MCSymbol *MBlazeMCInstLower::
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 95cc5077cc16..10d507f37bbc 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -16,8 +16,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index daa76e887fca..bd83afc1cc83 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -16,25 +16,25 @@
#include "MBlazeRegisterInfo.h"
#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
#include "MBlazeMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_REGINFO_TARGET_DESC
#include "MBlazeGenRegisterInfo.inc"
@@ -83,67 +83,21 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
-void MBlazeRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have a frame pointer, turn the adjcallstackup instruction into a
- // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
- // 'addi r1, r1, <amt>'
- MachineInstr *Old = I;
- int Amount = Old->getOperand(0).getImm() + 4;
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- MachineInstr *New;
- if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
- New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
- .addReg(MBlaze::R1).addImm(-Amount);
- } else {
- assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
- New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
- .addReg(MBlaze::R1).addImm(Amount);
- }
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
-
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
void MBlazeRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FIOperandNum, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
-
- unsigned i = 0;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned oi = i == 2 ? 1 : 2;
+ unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2;
DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n";
dbgs() << "<--------->\n" << MI);
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
int stackSize = MFI->getStackSize();
int spOffset = MFI->getObjectOffset(FrameIndex);
@@ -159,16 +113,16 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// as explained on LowerFormalArguments, detect negative offsets
// and adjust SPOffsets considering the final stack size.
int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
- Offset += MI.getOperand(oi).getImm();
+ Offset += MI.getOperand(OFIOperandNum).getImm();
DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
- MI.getOperand(oi).ChangeToImmediate(Offset);
- MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+ MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
}
void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 1d5116293516..497f3866c9ca 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -50,15 +50,13 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index eb375046f218..ed43d21f30c5 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -14,8 +14,8 @@
#ifndef MBLAZESUBTARGET_H
#define MBLAZESUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index f180652f1127..bcdd32fed947 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -13,8 +13,8 @@
#include "MBlazeTargetMachine.h"
#include "MBlaze.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -42,8 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index a8df4e63e3ee..956794dddaf9 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -14,17 +14,16 @@
#ifndef MBLAZE_TARGETMACHINE_H
#define MBLAZE_TARGETMACHINE_H
-#include "MBlazeSubtarget.h"
-#include "MBlazeInstrInfo.h"
+#include "MBlazeFrameLowering.h"
#include "MBlazeISelLowering.h"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeInstrInfo.h"
#include "MBlazeIntrinsicInfo.h"
-#include "MBlazeFrameLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class formatted_raw_ostream;
@@ -38,8 +37,6 @@ namespace llvm {
MBlazeSelectionDAGInfo TSInfo;
MBlazeIntrinsicInfo IntrinsicInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -75,11 +72,6 @@ namespace llvm {
const TargetIntrinsicInfo *getIntrinsicInfo() const
{ return &IntrinsicInfo; }
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
- { return &STTI; }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
- { return &VTTI; }
-
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 899c74ee8ed7..a7a0a68b1612 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -9,14 +9,14 @@
#include "MBlazeTargetObjectFile.h"
#include "MBlazeSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
void MBlazeTargetObjectFile::
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 44feeb49e7f1..6f9752c42951 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -8,9 +8,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
@@ -18,7 +19,6 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -54,7 +54,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -88,7 +88,7 @@ bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME: Is this right? It's what the "generic" code was doing before,
// but is X86 specific. Is it actually true for MBlaze also, or was it
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index 2b71d9d3c844..8faff6ade441 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/MBlazeBaseInfo.h"
#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 9a7549b0e7cf..380750d50f4c 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "MBlazeMCTargetDesc.h"
-#include "MBlazeMCAsmInfo.h"
#include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index 83c2a7d34da1..512ce9a08103 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -15,8 +15,7 @@ BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \
MBlazeGenAsmWriter.inc \
MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
- MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \
- MBlazeGenEDInfo.inc
+ MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
index 71210d8db466..323a7f647d56 100644
--- a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MBlaze.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 0930c453e954..4b12aeadd3e4 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -12,11 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "MSP430.h"
#include "MSP430InstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MSP430.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 2e328cb5d6ac..3c9576056946 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
- PointerSize = 2;
+ PointerSize = CalleeSaveStackSlotSize = 2;
PrivateGlobalPrefix = ".L";
WeakRefDirective ="\t.weak\t";
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index c455f6bc24f2..530e6aae92fd 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "MSP430MCTargetDesc.h"
-#include "MSP430MCAsmInfo.h"
#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 86bc183c1bdf..0a04e5ddb75d 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -14,26 +14,26 @@
#define DEBUG_TYPE "asm-printer"
#include "MSP430.h"
+#include "InstPrinter/MSP430InstPrinter.h"
#include "MSP430InstrInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
-#include "InstPrinter/MSP430InstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index bdeb0c590f2d..f128427f8066 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -18,11 +18,11 @@
#define DEBUG_TYPE "msp430-branch-select"
#include "MSP430.h"
#include "MSP430InstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
STATISTIC(NumExpanded, "Number of branches expanded to long format");
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index ad27cc9122a8..b448cc4ed9b8 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -24,6 +24,9 @@ def RetCC_MSP430 : CallingConv<[
// MSP430 Argument Calling Conventions
//===----------------------------------------------------------------------===//
def CC_MSP430 : CallingConv<[
+ // Pass by value if the byval attribute is given
+ CCIfByVal<CCPassByVal<2, 2>>,
+
// Promote i8 arguments to i16.
CCIfType<[i8], CCPromoteToType<i16>>,
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2e170f17bf9d..e504011dfdc8 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -14,15 +14,15 @@
#include "MSP430FrameLowering.h"
#include "MSP430InstrInfo.h"
#include "MSP430MachineFunctionInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -222,13 +222,73 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+void MSP430FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ unsigned StackAlign = getStackAlignment();
+
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
// Create a frame entry for the FPW register that must be saved.
- if (TFI->hasFP(MF)) {
+ if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
(void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index cb02545852b5..c673f59b5efc 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -35,6 +35,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -46,7 +50,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5efc6a36b894..1566c096037e 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -13,22 +13,22 @@
#include "MSP430.h"
#include "MSP430TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38ef..09cdf3268553 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -16,14 +16,8 @@
#include "MSP430ISelLowering.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
-#include "MSP430TargetMachine.h"
#include "MSP430Subtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
+#include "MSP430TargetMachine.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +26,12 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -164,6 +164,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
setOperationAction(ISD::SREM, MVT::i16, Expand);
+ // varargs support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
// Libcalls names.
if (HWMultMode == HWMultIntr) {
setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
@@ -192,6 +198,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -297,7 +304,6 @@ MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// LowerCCCArguments - transform physical registers into virtual registers and
/// generate load operations for arguments places on the stack.
// FIXME: struct return stuff
-// FIXME: varargs
SDValue
MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -311,6 +317,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -318,7 +325,11 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
- assert(!isVarArg && "Varargs not supported yet");
+ // Create frame index for the start of the first vararg value
+ if (isVarArg) {
+ unsigned Offset = CCInfo.getNextStackOffset();
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, Offset, true));
+ }
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -357,22 +368,34 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
} else {
// Sanity check
assert(VA.isMemLoc());
- // Load the argument to a virtual register
- unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
- if (ObjSize > 2) {
- errs() << "LowerFormalArguments Unhandled argument type: "
- << EVT(VA.getLocVT()).getEVTString()
- << "\n";
+
+ SDValue InVal;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), true);
+ InVal = DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
}
- // Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
-
- // Create the SelectionDAG nodes corresponding to a load
- //from this parameter
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+
+ InVals.push_back(InVal);
}
}
@@ -400,15 +423,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -421,16 +437,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- // Return Void
- return DAG.getNode(Opc, dl, MVT::Other, Chain);
+ return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size());
}
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -498,9 +517,23 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
StackPtr,
DAG.getIntPtrConstant(VA.getLocMemOffset()));
+ SDValue MemOp;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
+ MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile*/false,
+ /*AlwaysInline=*/true,
+ MachinePointerInfo(),
+ MachinePointerInfo());
+ } else {
+ MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),
+ false, false, 0);
+ }
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),false, false, 0));
+ MemOpChains.push_back(MemOp);
}
}
@@ -931,6 +964,22 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+
+ // Frame index of first vararg argument
+ SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Create a store of the frame index to the location operand
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), FrameIndex,
+ Op.getOperand(1), MachinePointerInfo(SV),
+ false, false, 0);
+}
+
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
@@ -1010,6 +1059,10 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
}
+bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+}
+
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 991304c23de3..e0ed870f5653 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,7 +73,7 @@ namespace llvm {
public:
explicit MSP430TargetLowering(MSP430TargetMachine &TM);
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -92,6 +92,7 @@ namespace llvm {
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
TargetLowering::ConstraintType
@@ -115,6 +116,7 @@ namespace llvm {
/// out to 16 bits.
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index be332f05b30b..a6b5f2f6d0bd 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -15,10 +15,10 @@
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index f003574eda00..e45780d05803 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,9 +40,9 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index b1773fba7e92..043e5becadbb 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MSP430MCInstLower.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -20,10 +21,9 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
using namespace llvm;
MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 632d6dee275f..d1697f478cc2 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -30,6 +30,9 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex;
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
@@ -41,6 +44,9 @@ public:
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex;}
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9ae238f66f57..0b3e9e259649 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,14 +17,14 @@
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/ErrorHandling.h"
#define GET_REGINFO_TARGET_DESC
#include "MSP430GenRegisterInfo.inc"
@@ -101,83 +101,18 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &MSP430::GR16RegClass;
}
-void MSP430RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub SPW, <amt>' and the
- // adjcallstackdown instruction into 'add SPW, <amt>'
- // TODO: consider using push / pop instead of sub + store / add
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
-
- MachineInstr *New = 0;
- if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
- } else {
- assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
- // factor out the amount the callee already popped.
- uint64_t CalleeAmt = Old->getOperand(1).getImm();
- Amount -= CalleeAmt;
- if (Amount)
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
- }
-
- if (New) {
- // The SRW implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
- } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back.
- if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- MachineInstr *Old = I;
- MachineInstr *New =
- BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
- MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
- // The SRW implicit def is dead.
- New->getOperand(3).setIsDead();
-
- MBB.insert(I, New);
- }
- }
-
- MBB.erase(I);
-}
-
void
MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
@@ -191,7 +126,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += 2; // Skip the saved FPW
// Fold imm into offset
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
if (MI.getOpcode() == MSP430::ADD16ri) {
// This is actually "load effective address" of the stack slot
@@ -199,7 +134,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// expand it into mov + add
MI.setDesc(TII.get(MSP430::MOV16rr));
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
if (Offset == 0)
return;
@@ -216,8 +151,8 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 64a43bcafbb4..69cccb275259 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -42,12 +42,9 @@ public:
const TargetRegisterClass*
getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 13e37b373533..164e351df952 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -13,9 +13,9 @@
#include "MSP430TargetMachine.h"
#include "MSP430.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
// FIXME: Check DataLayout string.
DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { }
+ FrameLowering(Subtarget) { }
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 186172ede428..be695a211109 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,16 +15,15 @@
#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
#define LLVM_TARGET_MSP430_TARGETMACHINE_H
-#include "MSP430InstrInfo.h"
-#include "MSP430ISelLowering.h"
#include "MSP430FrameLowering.h"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430InstrInfo.h"
#include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
#include "MSP430Subtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430TargetLowering TLInfo;
MSP430SelectionDAGInfo TSInfo;
MSP430FrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -64,12 +61,6 @@ public:
virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // MSP430TargetMachine.
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 8b3e01ecf52c..0d71d04ebe22 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MSP430.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 539a1f723bdd..edfd421d8532 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/Mangler.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
using namespace llvm;
static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 67b524883cf8..c403f216b0d6 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,11 +13,11 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/TargetRegistry.h"
@@ -84,15 +84,33 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID);
MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterClass);
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
- int tryParseRegister(StringRef Mnemonic);
+ int tryParseRegister(bool is64BitReg);
bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
+ bool is64BitReg);
bool needsExpansion(MCInst &Inst);
@@ -104,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCInst> &Instructions);
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res);
@@ -118,6 +139,10 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetAssignment();
+
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
bool isMips64() const {
@@ -128,9 +153,11 @@ class MipsAsmParser : public MCTargetAsmParser {
return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
}
- int matchRegisterName(StringRef Symbol);
+ int matchRegisterName(StringRef Symbol, bool is64BitReg);
- int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic);
+ int matchCPURegisterName(StringRef Symbol);
+
+ int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
void setFpFormat(FpFormatTy Format) {
FpFormat = Format;
@@ -146,7 +173,10 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getReg(int RC,int RegNo);
- unsigned getATReg();
+ int getATReg();
+
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -166,6 +196,20 @@ namespace {
/// instruction.
class MipsOperand : public MCParsedAsmOperand {
+public:
+ enum RegisterKind {
+ Kind_None,
+ Kind_CPURegs,
+ Kind_CPU64Regs,
+ Kind_HWRegs,
+ Kind_HW64Regs,
+ Kind_FGR32Regs,
+ Kind_FGR64Regs,
+ Kind_AFGR64Regs,
+ Kind_CCRRegs
+ };
+
+private:
enum KindTy {
k_CondCode,
k_CoprocNum,
@@ -178,24 +222,30 @@ class MipsOperand : public MCParsedAsmOperand {
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- const MCExpr *Off;
- } Mem;
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
SMLoc StartLoc, EndLoc;
@@ -246,6 +296,11 @@ public:
return Reg.RegNum;
}
+ void setRegKind(RegisterKind RegKind) {
+ assert((Kind == k_Register) && "Invalid access!");
+ Reg.Kind = RegKind;
+ }
+
const MCExpr *getImm() const {
assert((Kind == k_Immediate) && "Invalid access!");
return Imm.Val;
@@ -296,6 +351,45 @@ public:
return Op;
}
+ bool isCPURegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPURegs;
+ }
+ void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCPU64RegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPU64Regs;
+ }
+ void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHWRegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HWRegs;
+ }
+ void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHW64RegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HW64Regs;
+ }
+ void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ void addCCRAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCCRAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_CCRRegs;
+ }
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
@@ -307,6 +401,56 @@ public:
};
}
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ Inst.setLoc(IDLoc);
+ if (MCID.mayLoad() || MCID.mayStore()) {
+ // Check the offset of memory operand, if it is a symbol
+ // reference or immediate we may have to expand instructions
+ for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ MCOperand &Op = Inst.getOperand(i);
+ if (Op.isImm()) {
+ int MemOffset = Op.getImm();
+ if (MemOffset < -32768 || MemOffset > 32767) {
+ // Offset can't exceed 16bit value
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ return false;
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() == MCExpr::SymbolRef){
+ const MCSymbolRefExpr *SR =
+ static_cast<const MCSymbolRefExpr*>(Expr);
+ if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+ // Expand symbol
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (needsExpansion(Inst))
+ expandInstruction(Inst, IDLoc, Instructions);
+ else
+ Instructions.push_back(Inst);
+
+ return false;
+}
+
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch(Inst.getOpcode()) {
@@ -344,31 +488,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
if ( 0 <= ImmValue && ImmValue <= 65535) {
// for 0 <= j <= 65535.
// li d,j => ori d,$zero,j
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else if ( ImmValue < 0 && ImmValue >= -32768) {
// for -32768 <= j < 0.
// li d,j => addiu d,$zero,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
// for any other value of j that is representable as a 32-bit integer.
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -390,7 +534,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
if ( -32768 <= ImmValue && ImmValue <= 65535) {
//for -32768 <= j <= 65535.
//la d,j(s) => addiu d,s,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
@@ -400,12 +544,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
//la d,j(s) => lui d,hi16(j)
// ori d,d,lo16(j)
// addu d,d,s
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -433,19 +577,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
//for any other value of j that is representable as a 32-bit integer.
//la d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -453,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
}
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd) {
+ const MCSymbolRefExpr *SR;
+ MCInst TempInst;
+ unsigned ImmOffset,HiOffset,LoOffset;
+ const MCExpr *ExprOffset;
+ unsigned TmpRegNum;
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID,
+ getATReg());
+ // 1st operand is either source or dst register
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ unsigned RegOpNum = Inst.getOperand(0).getReg();
+ // 2nd operand is base register
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ unsigned BaseRegNum = Inst.getOperand(1).getReg();
+ // 3rd operand is either immediate or expression
+ if (isImmOpnd) {
+ assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+ ImmOffset = Inst.getOperand(2).getImm();
+ LoOffset = ImmOffset & 0x0000ffff;
+ HiOffset = (ImmOffset & 0xffff0000) >> 16;
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ if (LoOffset & 0x8000)
+ HiOffset++;
+ }
+ else
+ ExprOffset = Inst.getOperand(2).getExpr();
+ // All instructions will have the same location
+ TempInst.setLoc(IDLoc);
+ // 1st instruction in expansion is LUi. For load instruction we can use
+ // the dst register as a temporary if base and dst are different,
+ // but for stores we must use $at
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ TempInst.setOpcode(Mips::LUi);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ }
+ }
+ // Add the instruction to the list
+ Instructions.push_back(TempInst);
+ // and prepare TempInst for next instruction
+ TempInst.clear();
+ // which is add temp register to base
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ // and finaly, create original instruction with low part
+ // of offset and new base
+ TempInst.setOpcode(Inst.getOpcode());
+ TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ }
+ }
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
+ SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success: {
- if (needsExpansion(Inst)) {
- SmallVector<MCInst, 4> Instructions;
- expandInstruction(Inst, IDLoc, Instructions);
- for(unsigned i =0; i < Instructions.size(); i++){
- Out.EmitInstruction(Instructions[i]);
- }
- } else {
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
- }
+ if (processInstruction(Inst,IDLoc,Instructions))
+ return true;
+ for(unsigned i =0; i < Instructions.size(); i++)
+ Out.EmitInstruction(Instructions[i]);
return false;
}
case Match_MissingFeature:
@@ -498,84 +717,72 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
-int MipsAsmParser::matchRegisterName(StringRef Name) {
-
+int MipsAsmParser::matchCPURegisterName(StringRef Name) {
int CC;
- if (!isMips64())
+
+ if (Name == "at")
+ return getATReg();
+
CC = StringSwitch<unsigned>(Name)
- .Case("zero", Mips::ZERO)
- .Case("a0", Mips::A0)
- .Case("a1", Mips::A1)
- .Case("a2", Mips::A2)
- .Case("a3", Mips::A3)
- .Case("v0", Mips::V0)
- .Case("v1", Mips::V1)
- .Case("s0", Mips::S0)
- .Case("s1", Mips::S1)
- .Case("s2", Mips::S2)
- .Case("s3", Mips::S3)
- .Case("s4", Mips::S4)
- .Case("s5", Mips::S5)
- .Case("s6", Mips::S6)
- .Case("s7", Mips::S7)
- .Case("k0", Mips::K0)
- .Case("k1", Mips::K1)
- .Case("sp", Mips::SP)
- .Case("fp", Mips::FP)
- .Case("gp", Mips::GP)
- .Case("ra", Mips::RA)
- .Case("t0", Mips::T0)
- .Case("t1", Mips::T1)
- .Case("t2", Mips::T2)
- .Case("t3", Mips::T3)
- .Case("t4", Mips::T4)
- .Case("t5", Mips::T5)
- .Case("t6", Mips::T6)
- .Case("t7", Mips::T7)
- .Case("t8", Mips::T8)
- .Case("t9", Mips::T9)
- .Case("at", Mips::AT)
- .Case("fcc0", Mips::FCC0)
- .Default(-1);
- else
+ .Case("zero", 0)
+ .Case("a0", 4)
+ .Case("a1", 5)
+ .Case("a2", 6)
+ .Case("a3", 7)
+ .Case("v0", 2)
+ .Case("v1", 3)
+ .Case("s0", 16)
+ .Case("s1", 17)
+ .Case("s2", 18)
+ .Case("s3", 19)
+ .Case("s4", 20)
+ .Case("s5", 21)
+ .Case("s6", 22)
+ .Case("s7", 23)
+ .Case("k0", 26)
+ .Case("k1", 27)
+ .Case("sp", 29)
+ .Case("fp", 30)
+ .Case("gp", 28)
+ .Case("ra", 31)
+ .Case("t0", 8)
+ .Case("t1", 9)
+ .Case("t2", 10)
+ .Case("t3", 11)
+ .Case("t4", 12)
+ .Case("t5", 13)
+ .Case("t6", 14)
+ .Case("t7", 15)
+ .Case("t8", 24)
+ .Case("t9", 25)
+ .Default(-1);
+
+ // Although SGI documentation just cut out t0-t3 for n32/n64,
+ // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+ // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+ if (isMips64() && 8 <= CC && CC <= 11)
+ CC += 4;
+
+ if (CC == -1 && isMips64())
CC = StringSwitch<unsigned>(Name)
- .Case("zero", Mips::ZERO_64)
- .Case("at", Mips::AT_64)
- .Case("v0", Mips::V0_64)
- .Case("v1", Mips::V1_64)
- .Case("a0", Mips::A0_64)
- .Case("a1", Mips::A1_64)
- .Case("a2", Mips::A2_64)
- .Case("a3", Mips::A3_64)
- .Case("a4", Mips::T0_64)
- .Case("a5", Mips::T1_64)
- .Case("a6", Mips::T2_64)
- .Case("a7", Mips::T3_64)
- .Case("t4", Mips::T4_64)
- .Case("t5", Mips::T5_64)
- .Case("t6", Mips::T6_64)
- .Case("t7", Mips::T7_64)
- .Case("s0", Mips::S0_64)
- .Case("s1", Mips::S1_64)
- .Case("s2", Mips::S2_64)
- .Case("s3", Mips::S3_64)
- .Case("s4", Mips::S4_64)
- .Case("s5", Mips::S5_64)
- .Case("s6", Mips::S6_64)
- .Case("s7", Mips::S7_64)
- .Case("t8", Mips::T8_64)
- .Case("t9", Mips::T9_64)
- .Case("kt0", Mips::K0_64)
- .Case("kt1", Mips::K1_64)
- .Case("gp", Mips::GP_64)
- .Case("sp", Mips::SP_64)
- .Case("fp", Mips::FP_64)
- .Case("s8", Mips::FP_64)
- .Case("ra", Mips::RA_64)
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Case("s8", 30)
.Default(-1);
+ return CC;
+}
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+
+ int CC;
+ CC = matchCPURegisterName(Name);
if (CC != -1)
- return CC;
+ return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID);
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
@@ -639,70 +846,44 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-unsigned MipsAsmParser::getATReg() {
- unsigned Reg = Options.getATRegNum();
- if (isMips64())
- return getReg(Mips::CPU64RegsRegClassID,Reg);
-
- return getReg(Mips::CPURegsRegClassID,Reg);
+int MipsAsmParser::getATReg() {
+ return Options.getATRegNum();
}
unsigned MipsAsmParser::getReg(int RC,int RegNo) {
return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
}
-int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) {
-
- if (Mnemonic.lower() == "rdhwr") {
- // at the moment only hwreg29 is supported
- if (RegNum != 29)
- return -1;
- return Mips::HWR29;
- }
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
if (RegNum > 31)
return -1;
- // MIPS64 registers are numbered 1 after the 32-bit equivalents
- return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64();
+ return getReg(RegClass, RegNum);
}
-int MipsAsmParser::tryParseRegister(StringRef Mnemonic) {
+int MipsAsmParser::tryParseRegister(bool is64BitReg) {
const AsmToken &Tok = Parser.getTok();
int RegNum = -1;
if (Tok.is(AsmToken::Identifier)) {
std::string lowerCase = Tok.getString().lower();
- RegNum = matchRegisterName(lowerCase);
+ RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- Mnemonic.lower());
- else
- return RegNum; //error
- // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64
- if (isMips64() && RegNum == Mips::ZERO_64) {
- if (Mnemonic.find("ddiv") != StringRef::npos)
- RegNum = Mips::ZERO;
- }
+ is64BitReg ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID);
return RegNum;
}
bool MipsAsmParser::
tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic){
+ bool is64BitReg){
SMLoc S = Parser.getTok().getLoc();
int RegNo = -1;
- // FIXME: we should make a more generic method for CCR
- if ((Mnemonic == "cfc1" || Mnemonic == "ctc1")
- && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){
- RegNo = Parser.getTok().getIntVal(); // get the int value
- // at the moment only fcc0 is supported
- if (RegNo == 0)
- RegNo = Mips::FCC0;
- } else
- RegNo = tryParseRegister(Mnemonic);
+ RegNo = tryParseRegister(is64BitReg);
if (RegNo == -1)
return true;
@@ -734,7 +915,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat dollar token.
// parse register operand
- if (!tryParseRegisterOperand(Operands, Mnemonic)) {
+ if (!tryParseRegisterOperand(Operands, isMips64())) {
if (getLexer().is(AsmToken::LParen)) {
// check if it is indexed addressing operand
Operands.push_back(MipsOperand::CreateToken("(", S));
@@ -743,7 +924,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
Parser.Lex(); // eat dollar
- if (tryParseRegisterOperand(Operands, Mnemonic))
+ if (tryParseRegisterOperand(Operands, isMips64()))
return true;
if (!getLexer().is(AsmToken::RParen))
@@ -757,7 +938,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
}
// maybe it is a symbol reference
StringRef Identifier;
- if (Parser.ParseIdentifier(Identifier))
+ if (Parser.parseIdentifier(Identifier))
return true;
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -772,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return false;
}
case AsmToken::Identifier:
+ // Look for the existing symbol, we should check if
+ // we need to assigne the propper RegisterKind
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+ return false;
+ //else drop to expression parsing
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -780,7 +966,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
// quoted label names
const MCExpr *IdVal;
SMLoc S = Parser.getTok().getLoc();
- if (getParser().ParseExpression(IdVal))
+ if (getParser().parseExpression(IdVal))
return true;
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
@@ -832,7 +1018,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
} else
break;
}
- if (getParser().ParseParenExpression(IdVal,EndLoc))
+ if (getParser().parseParenExpression(IdVal,EndLoc))
return true;
while (getLexer().getKind() == AsmToken::RParen)
@@ -843,19 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
// Check the type of the expression
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
- // it's a constant, evaluate lo or hi value
- int Val = MCE->getValue();
+ // It's a constant, evaluate lo or hi value
if (Str == "lo") {
- Val = Val & 0xffff;
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
} else if (Str == "hi") {
+ int Val = MCE->getValue();
+ int LoSign = Val & 0x8000;
Val = (Val & 0xffff0000) >> 16;
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate
+ if (LoSign)
+ Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
}
- Res = MCConstantExpr::Create(Val, getContext());
return false;
}
if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
- // it's a symbol, create symbolic expression from symbol
+ // It's a symbol, create symbolic expression from symbol
StringRef Symbol = MSRE->getSymbol().getName();
MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
@@ -868,7 +1060,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- RegNo = tryParseRegister("");
+ RegNo = tryParseRegister(isMips64());
EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -880,10 +1072,11 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
switch(getLexer().getKind()) {
default:
return true;
+ case AsmToken::Identifier:
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
- return (getParser().ParseExpression(Res));
+ return (getParser().parseExpression(Res));
case AsmToken::Percent:
return parseRelocOperand(Res);
case AsmToken::LParen:
@@ -907,7 +1100,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
if (Tok.isNot(AsmToken::LParen)) {
MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
if (Mnemonic->getToken() == "la") {
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1);
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return MatchOperand_Success;
}
@@ -920,7 +1113,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok1 = Parser.getTok(); // get next token
if (Tok1.is(AsmToken::Dollar)) {
Parser.Lex(); // Eat '$' token.
- if (tryParseRegisterOperand(Operands,"")) {
+ if (tryParseRegisterOperand(Operands, isMips64())) {
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return MatchOperand_ParseFail;
}
@@ -954,6 +1147,180 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, true)) {
+ // set the proper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPU64Regs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterKind) {
+
+ MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+ if (Sym) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *Expr;
+ if (Sym->isVariable())
+ Expr = Sym->getVariableValue();
+ else
+ return false;
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const StringRef DefSymbol = Ref->getSymbol().getName();
+ if (DefSymbol.startswith("$")) {
+ // Lookup for the register with corresponding name
+ int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ if (RegNum > -1) {
+ Parser.Lex();
+ MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ } else if (Expr->getKind() == MCExpr::Constant) {
+ Parser.Lex();
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+ MipsOperand *op = MipsOperand::CreateImm(Const,S,
+ Parser.getTok().getLoc());
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ return false;
+}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, false)) {
+ // set the propper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPURegs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (isMips64())
+ return MatchOperand_NoMatch;
+
+ // if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HWRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HW64Regs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ unsigned RegNum;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.is(AsmToken::Integer)) {
+ RegNum = Tok.getIntVal();
+ // at the moment only fcc0 is supported
+ if (RegNum != 0)
+ return MatchOperand_ParseFail;
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // at the moment only fcc0 is supported
+ if (Tok.getIdentifier() != "fcc0")
+ return MatchOperand_ParseFail;
+ } else
+ return MatchOperand_NoMatch;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_CCRRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
MCSymbolRefExpr::VariantKind VK
@@ -1023,13 +1390,13 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
// Read the first operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1041,14 +1408,14 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1059,16 +1426,18 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
bool MipsAsmParser::
ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef Mnemonic;
// floating point instructions: should register be treated as double?
if (requestsDoubleOperand(Name)) {
setFpFormat(FP_FORMAT_D);
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+ Mnemonic = Name;
}
else {
setDefaultFpFormat();
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
- StringRef Mnemonic = Name.slice(Start, Next);
+ Mnemonic = Name.slice(Start, Next);
Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
@@ -1108,9 +1477,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Name)) {
+ if (ParseOperand(Operands, Mnemonic)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1120,7 +1489,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
@@ -1128,7 +1497,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1138,18 +1507,18 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
}
bool MipsAsmParser::parseSetNoAtDirective() {
- // line should look like:
+ // Line should look like:
// .set noat
// set at reg to 0
Options.setATReg(0);
// eat noat
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1161,28 +1530,39 @@ bool MipsAsmParser::parseSetAtDirective() {
// line can be
// .set at - defaults to $1
// or .set at=$reg
+ int AtRegNo;
getParser().Lex();
if (getLexer().is(AsmToken::EndOfStatement)) {
Options.setATReg(1);
Parser.Lex(); // Consume the EndOfStatement
return false;
} else if (getLexer().is(AsmToken::Equal)) {
- getParser().Lex(); //eat '='
+ getParser().Lex(); // eat '='
if (getLexer().isNot(AsmToken::Dollar)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // eat '$'
- if (getLexer().isNot(AsmToken::Integer)) {
+ Parser.Lex(); // Eat '$'
+ const AsmToken &Reg = Parser.getTok();
+ if (Reg.is(AsmToken::Identifier)) {
+ AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+ } else if (Reg.is(AsmToken::Integer)) {
+ AtRegNo = Reg.getIntVal();
+ } else {
reportParseError("unexpected token in statement");
return false;
}
- const AsmToken &Reg = Parser.getTok();
- if (!Options.setATReg(Reg.getIntVal())) {
+
+ if ( AtRegNo < 1 || AtRegNo > 31) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ if (!Options.setATReg(AtRegNo)) {
reportParseError("unexpected token in statement");
return false;
}
- getParser().Lex(); //eat reg
+ getParser().Lex(); // Eat reg
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
@@ -1198,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() {
bool MipsAsmParser::parseSetReorderDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1247,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
}
+
+bool MipsAsmParser::parseSetAssignment() {
+ StringRef Name;
+ const MCExpr *Value;
+
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier after .set");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return reportParseError("unexpected token in .set directive");
+ Lex(); //eat comma
+
+ if (Parser.parseExpression(Value))
+ reportParseError("expected valid expression after comma");
+
+ // check if the Name already exists as a symbol
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ return reportParseError("symbol already defined");
+ }
+ Sym = getContext().GetOrCreateSymbol(Name);
+ Sym->setVariableValue(Value);
+
+ return false;
+}
bool MipsAsmParser::parseDirectiveSet() {
// get next token
@@ -1266,55 +1671,92 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoMacroDirective();
} else if (Tok.getString() == "nomips16") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
} else if (Tok.getString() == "nomicromips") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
+ return false;
+ } else {
+ // it is just an identifier, look for assignment
+ parseSetAssignment();
return false;
}
+
return true;
}
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
- if (DirectiveID.getString() == ".ent") {
+ StringRef IDVal = DirectiveID.getString();
+
+ if ( IDVal == ".ent") {
// ignore this directive for now
Parser.Lex();
return false;
}
- if (DirectiveID.getString() == ".end") {
+ if (IDVal == ".end") {
// ignore this directive for now
Parser.Lex();
return false;
}
- if (DirectiveID.getString() == ".frame") {
+ if (IDVal == ".frame") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".set") {
+ if (IDVal == ".set") {
return parseDirectiveSet();
}
- if (DirectiveID.getString() == ".fmask") {
+ if (IDVal == ".fmask") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".mask") {
+ if (IDVal == ".mask") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".gpword") {
+ if (IDVal == ".gpword") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".word") {
+ parseDirectiveWord(4, DirectiveID.getLoc());
return false;
}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ef56e752b2e4..cf8bb189e475 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
add_public_tablegen_target(MipsCommonTableGen)
@@ -17,10 +16,13 @@ add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Mips16FrameLowering.cpp
Mips16InstrInfo.cpp
+ Mips16ISelDAGToDAG.cpp
+ Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
+ MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
@@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
+ MipsSEISelDAGToDAG.cpp
+ MipsSEISelLowering.cpp
MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index 048ad0ddac5b..7101c06d12ac 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/Mips/Disassembler/Makefile b/lib/Target/Mips/Disassembler/Makefile
index a78feba1f8df..7900373dd2b2 100644
--- a/lib/Target/Mips/Disassembler/Makefile
+++ b/lib/Target/Mips/Disassembler/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/Mips/Disassembler/Makefile ---------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 82dbcc5bcf7d..59e49d8ddc6c 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -12,18 +12,15 @@
//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "MipsSubtarget.h"
#include "MipsRegisterInfo.h"
-#include "llvm/MC/EDInstInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
-
-#include "MipsGenEDInfo.inc"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -42,9 +39,6 @@ public:
virtual ~MipsDisassemblerBase() {}
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-
const MCRegisterInfo *getRegInfo() const { return RegInfo; }
private:
@@ -92,10 +86,6 @@ public:
} // end anonymous namespace
-const EDInstInfo *MipsDisassemblerBase::getEDInfo() const {
- return instInfoMips;
-}
-
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
@@ -103,6 +93,11 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -143,10 +138,10 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
@@ -332,6 +327,15 @@ static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
}
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+
+ return MCDisassembler::Fail;
+
+}
+
static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -480,14 +484,14 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 68d3ac5f3bd0..fc23cd380352 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define PRINT_ALIAS_INSTR
#include "MipsGenAsmWriter.inc"
const char* Mips::MipsFCCToString(Mips::CondCode CC) {
@@ -78,7 +79,9 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\t.set\tmips32r2\n";
}
- printInstruction(MI, O);
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
printAnnotation(O, Annot);
switch (MI->getOpcode()) {
@@ -149,6 +152,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
OS << ')';
}
+void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNo).getReg());
+}
+
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 3d8a6f918ff6..d1b561f9764e 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -87,6 +87,9 @@ public:
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index be5d7e42532a..4212c94a5578 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,8 @@ add_llvm_library(LLVMMipsDesc
MipsMCCodeEmitter.cpp
MipsMCTargetDesc.cpp
MipsELFObjectWriter.cpp
+ MipsReginfo.cpp
+ MipsELFStreamer.cpp
)
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index c078794899d2..0b13607a572d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -37,6 +37,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_4:
case FK_Data_8:
case Mips::fixup_Mips_LO16:
+ case Mips::fixup_Mips_GPREL16:
case Mips::fixup_Mips_GPOFF_HI:
case Mips::fixup_Mips_GPOFF_LO:
case Mips::fixup_Mips_GOT_PAGE:
@@ -213,7 +214,7 @@ public:
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME.
assert(0 && "RelaxInstruction() unimplemented");
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 94e0d20d8835..7a55efd5c330 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -121,99 +121,6 @@ namespace MipsII {
};
}
-
-/// getMipsRegisterNumbering - Given the enum value for some register,
-/// return the number that it corresponds to.
-inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
-{
- switch (RegEnum) {
- case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
- case Mips::D0: case Mips::FCC0: case Mips::AC0:
- return 0;
- case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
- case Mips::AC1:
- return 1;
- case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
- case Mips::D1: case Mips::AC2:
- return 2;
- case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
- case Mips::AC3:
- return 3;
- case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
- case Mips::D2:
- return 4;
- case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
- return 5;
- case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
- case Mips::D3:
- return 6;
- case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
- return 7;
- case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
- case Mips::D4:
- return 8;
- case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
- return 9;
- case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
- case Mips::D5:
- return 10;
- case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
- return 11;
- case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
- case Mips::D6:
- return 12;
- case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
- return 13;
- case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
- case Mips::D7:
- return 14;
- case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
- return 15;
- case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
- case Mips::D8:
- return 16;
- case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
- return 17;
- case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
- case Mips::D9:
- return 18;
- case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
- return 19;
- case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
- case Mips::D10:
- return 20;
- case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
- return 21;
- case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
- case Mips::D11:
- return 22;
- case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
- return 23;
- case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
- case Mips::D12:
- return 24;
- case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
- return 25;
- case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
- case Mips::D13:
- return 26;
- case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
- return 27;
- case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
- case Mips::D14:
- return 28;
- case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
- case Mips::HWR29:
- return 29;
- case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
- case Mips::D15:
- return 30;
- case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
- return 31;
- default: llvm_unreachable("Unknown register number!");
- }
-}
-
inline static std::pair<const MCSymbolRefExpr*, int64_t>
MipsGetSymAndOffset(const MCFixup &Fixup) {
MCFixupKind FixupKind = Fixup.getKind();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index f82e203c23ca..6471b51583ce 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -42,7 +42,6 @@ namespace {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
- virtual unsigned getEFlags() const;
virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -61,19 +60,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
MipsELFObjectWriter::~MipsELFObjectWriter() {}
-// FIXME: get the real EABI Version from the Subtarget class.
-unsigned MipsELFObjectWriter::getEFlags() const {
-
- // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
- unsigned Flag = ELF::EF_MIPS_NOREORDER;
-
- if (is64Bit())
- Flag |= ELF::EF_MIPS_ARCH_64R2;
- else
- Flag |= ELF::EF_MIPS_ARCH_32R2;
- return Flag;
-}
-
const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -108,7 +94,13 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
Type = ELF::R_MIPS_64;
break;
case FK_GPRel_4:
- Type = ELF::R_MIPS_GPREL32;
+ if (isN64()) {
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
+ }
+ else
+ Type = ELF::R_MIPS_GPREL32;
break;
case Mips::fixup_Mips_GPREL16:
Type = ELF::R_MIPS_GPREL16;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
new file mode 100644
index 000000000000..c33bc9ae3034
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -0,0 +1,89 @@
+//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "MipsSubtarget.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
+ RelaxAll, NoExecStack);
+ return S;
+ }
+
+ // For llc. Set a group of ELF header flags
+ void
+ MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
+
+ if (hasRawTextSupport())
+ return;
+
+ // Update e_header flags
+ MCAssembler& MCA = getAssembler();
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+
+ if (Subtarget.inMips16Mode())
+ EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+ else
+ EFlags |= ELF::EF_MIPS_NOREORDER;
+
+ // Architecture
+ if (Subtarget.hasMips64r2())
+ EFlags |= ELF::EF_MIPS_ARCH_64R2;
+ else if (Subtarget.hasMips64())
+ EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Subtarget.hasMips32r2())
+ EFlags |= ELF::EF_MIPS_ARCH_32R2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_32;
+
+ if (Subtarget.inMicroMipsMode())
+ EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+ // ABI
+ if (Subtarget.isABI_O32())
+ EFlags |= ELF::EF_MIPS_ABI_O32;
+
+ // Relocation Model
+ Reloc::Model RM = Subtarget.getRelocationModel();
+ if (RM == Reloc::PIC_ || RM == Reloc::Default)
+ EFlags |= ELF::EF_MIPS_PIC;
+ else if (RM == Reloc::Static)
+ ; // Do nothing for Reloc::Static
+ else
+ llvm_unreachable("Unsupported relocation model for e_flags");
+
+ MCA.setELFHeaderEFlags(EFlags);
+ }
+
+ // For llc. Set a symbol's STO flags
+ void
+ MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val) {
+
+ if (hasRawTextSupport())
+ return;
+
+ MCSymbolData &Data = getOrCreateSymbolData(Sym);
+ // The "other" values are stored in the last 6 bits of the second byte
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, Val >> 2);
+ }
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
new file mode 100644
index 000000000000..b10ccc78e665
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -0,0 +1,43 @@
+//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#ifndef MIPSELFSTREAMER_H_
+#define MIPSELFSTREAMER_H_
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MipsAsmPrinter;
+class MipsSubtarget;
+class MCSymbol;
+
+class MipsELFStreamer : public MCELFStreamer {
+public:
+ MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack)
+ : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
+ }
+
+ ~MipsELFStreamer() {}
+ void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
+ void emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val);
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_MipsELFStreamer;
+ }
+};
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif /* MIPSELFSTREAMER_H_ */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 9d67aa1856e3..5d4b32d30578 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -24,6 +24,11 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
(TheTriple.getArch() == Triple::mips64))
IsLittleEndian = false;
+ if ((TheTriple.getArch() == Triple::mips64el) ||
+ (TheTriple.getArch() == Triple::mips64)) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
Data32bitsDirective = "\t.4byte\t";
@@ -34,7 +39,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
WeakRefDirective = "\t.weak\t";
-
+ DebugLabelSuffix = "=.";
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
HasLEB128 = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index da1e4552c9d0..e198a7c983f0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,11 +34,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
+ MCContext &Ctx;
bool IsLittleEndian;
public:
- MipsMCCodeEmitter(const MCInstrInfo &mcii, bool IsLittle) :
- MCII(mcii), IsLittleEndian(IsLittle) {}
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
+ const MCSubtargetInfo &sti, bool IsLittle) :
+ MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
~MipsMCCodeEmitter() {}
@@ -93,7 +96,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
- return new MipsMCCodeEmitter(MCII, false);
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, false);
}
MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
@@ -101,7 +104,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
- return new MipsMCCodeEmitter(MCII, true);
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, true);
}
/// EncodeInstruction - Emit the instruction.
@@ -139,12 +142,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
llvm_unreachable("unimplemented opcode in EncodeInstruction()");
const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
- uint64_t TSFlags = Desc.TSFlags;
-
- // Pseudo instructions don't get encoded and shouldn't be here
- // in the first place!
- if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo)
- llvm_unreachable("Pseudo opcode found in EncodeInstruction()");
// Get byte count of instruction
unsigned Size = Desc.getSize();
@@ -163,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
assert(MO.isExpr() &&
"getBranchTargetOpValue expects only expressions or immediates");
@@ -182,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm()>>2;
+
assert(MO.isExpr() &&
"getJumpTargetOpValue expects only expressions or an immediate");
@@ -200,7 +199,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = getMipsRegisterNumbering(Reg);
+ unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
return RegNo;
} else if (MO.isImm()) {
return static_cast<unsigned>(MO.getImm());
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index f634f082be5a..be83b54b6124 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,15 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsMCAsmInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
#include "MipsMCTargetDesc.h"
#include "InstPrinter/MipsInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "MipsMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -131,7 +132,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeMipsTargetMC() {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
new file mode 100644
index 000000000000..1dc9bcb36a5f
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
@@ -0,0 +1,80 @@
+//===-- MipsReginfo.cpp - Registerinfo handling --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// .reginfo
+// Elf32_Word ri_gprmask
+// Elf32_Word ri_cprmask[4]
+// Elf32_Word ri_gp_value
+//
+// .MIPS.options - N64
+// Elf64_Byte kind (ODK_REGINFO)
+// Elf64_Byte size (40 bytes)
+// Elf64_Section section (0)
+// Elf64_Word info (unused)
+// Elf64_Word ri_gprmask ()
+// Elf64_Word ri_pad ()
+// Elf64_Word[4] ri_cprmask ()
+// Elf64_Addr ri_gp_value ()
+//
+// .MIPS.options - N32
+// Elf32_Byte kind (ODK_REGINFO)
+// Elf32_Byte size (36 bytes)
+// Elf32_Section section (0)
+// Elf32_Word info (unused)
+// Elf32_Word ri_gprmask ()
+// Elf32_Word ri_pad ()
+// Elf32_Word[4] ri_cprmask ()
+// Elf32_Addr ri_gp_value ()
+//
+//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsReginfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetObjectFile.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// Integrated assembler version
+void
+MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const
+{
+
+ if (OS.hasRawTextSupport())
+ return;
+
+ const MipsTargetObjectFile &TLOFELF =
+ static_cast<const MipsTargetObjectFile &>(TLOF);
+ OS.SwitchSection(TLOFELF.getReginfoSection());
+
+ // .reginfo
+ if (MST.isABI_O32()) {
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 4); // ri_gp_value
+ }
+ // .MIPS.options
+ else if (MST.isABI_N64()) {
+ OS.EmitIntValue(1, 1); // kind
+ OS.EmitIntValue(40, 1); // size
+ OS.EmitIntValue(0, 2); // section
+ OS.EmitIntValue(0, 4); // info
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // pad
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 8); // ri_gp_value
+ }
+ else llvm_unreachable("Unsupported abi for reginfo");
+}
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
new file mode 100644
index 000000000000..039b8eaaf287
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
@@ -0,0 +1,31 @@
+//=== MipsReginfo.h - MipsReginfo -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGINFO_H
+#define MIPSREGINFO_H
+
+namespace llvm {
+ class MCStreamer;
+ class TargetLoweringObjectFile;
+ class MipsSubtarget;
+
+ class MipsReginfo {
+ void anchor();
+ public:
+ MipsReginfo() {}
+
+ void emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const;
+ };
+
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index bd8c5173454e..bcf951e861b0 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
- MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+ MipsGenDisassemblerTables.inc \
MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 2963f7e7fa42..8c65bb4020b5 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -27,6 +27,7 @@ namespace llvm {
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
+ FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
} // end namespace llvm;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 90c01d5de0a9..eefb02a494ca 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,32 +44,29 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
-def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true",
- "Target is android">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
"Enable 'signext in register' instructions.">;
def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
"Enable 'conditional move' instructions.">;
-def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
- "Enable 'multiply add/sub' instructions.">;
-def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true",
- "Enable 'min/max' instructions.">;
def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
"Enable 'byte/half swap' instructions.">;
def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
"Enable 'count leading bits' instructions.">;
+def FeatureFPIdx : SubtargetFeature<"FPIdx", "HasFPIdx", "true",
+ "Enable 'FP indexed load/store' instructions.">;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
"Mips32 ISA Support",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
- [FeatureMips32, FeatureSEInReg, FeatureSwap]>;
+ [FeatureMips32, FeatureSEInReg, FeatureSwap,
+ FeatureFPIdx]>;
def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
"Mips64", "Mips64 ISA Support",
[FeatureGP64Bit, FeatureFP64Bit,
- FeatureMips32]>;
+ FeatureMips32, FeatureFPIdx]>;
def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
"Mips64r2", "Mips64r2 ISA Support",
[FeatureMips64, FeatureMips32r2]>;
@@ -81,6 +78,9 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
+ "microMips mode">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 4e6b21feb55d..1bb6fe46295b 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -12,25 +12,26 @@
//===----------------------------------------------------------------------===//
#include "Mips16FrameLowering.h"
-#include "MipsInstrInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "Mips16InstrInfo.h"
+#include "MipsInstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -38,9 +39,35 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
// No need to allocate space on the stack.
if (StackSize == 0 && !MFI->adjustsStack()) return;
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
// Adjust stack.
- if (isInt<16>(-StackSize))
- BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+ TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP, -8);
+ SrcML = MachineLocation(Mips::S1);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -12);
+ SrcML = MachineLocation(Mips::S0);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -4);
+ SrcML = MachineLocation(Mips::RA);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
if (hasFP(MF))
BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
@@ -52,8 +79,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -65,9 +92,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(Mips::S0);
// Adjust stack.
- if (isInt<16>(StackSize))
- // assumes stacksize multiple of 8
- BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+ // assumes stacksize multiple of 8
+ TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI);
}
bool Mips16FrameLowering::
@@ -113,6 +139,25 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void Mips16FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
bool
Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 01db71e8def5..54fdb7871466 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,13 +20,17 @@ namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI) {}
+ : MipsFrameLowering(STI, 8) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..00b3449300c5
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 000000000000..baa85877d957
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 000000000000..23eb5375ac94
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 000000000000..b23e2a1f37db
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index 61602b62fb44..4ff62ef3b6f9 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -29,45 +29,13 @@
//
//===----------------------------------------------------------------------===//
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-//
-class Format16<bits<5> val> {
- bits<5> Value = val;
-}
-
-def Pseudo16 : Format16<0>;
-def FrmI16 : Format16<1>;
-def FrmRI16 : Format16<2>;
-def FrmRR16 : Format16<3>;
-def FrmRRI16 : Format16<4>;
-def FrmRRR16 : Format16<5>;
-def FrmRRI_A16 : Format16<6>;
-def FrmSHIFT16 : Format16<7>;
-def FrmI8_TYPE16 : Format16<8>;
-def FrmI8_MOVR3216 : Format16<9>;
-def FrmI8_MOV32R16 : Format16<10>;
-def FrmI8_SVRS16 : Format16<11>;
-def FrmJAL16 : Format16<12>;
-def FrmJALX16 : Format16<13>;
-def FrmEXT_I16 : Format16<14>;
-def FrmASMACRO16 : Format16<15>;
-def FrmEXT_RI16 : Format16<16>;
-def FrmEXT_RRI16 : Format16<17>;
-def FrmEXT_RRI_A16 : Format16<18>;
-def FrmEXT_SHIFT16 : Format16<19>;
-def FrmEXT_I816 : Format16<20>;
-def FrmEXT_I8_SVRS16 : Format16<21>;
-def FrmOther16 : Format16<22>; // Instruction w/ a custom format
// Base class for Mips 16 Format
// This class does not depend on the instruction size
//
class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>: Instruction
+ InstrItinClass itin>: Instruction
{
- Format16 Form = f;
let Namespace = "Mips";
@@ -78,14 +46,6 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
- //
- // Attributes specific to Mips instructions...
- //
- bits<5> FormBits = Form.Value;
-
- // TSFlags layout should be kept in sync with MipsInstrInfo.h.
- let TSFlags{4-0} = FormBits;
-
let Predicates = [InMips16Mode];
}
@@ -93,30 +53,35 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
// Generic Mips 16 Format
//
class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
{
field bits<16> Inst;
bits<5> Opcode = 0;
// Top 5 bits are the 'opcode' field
let Inst{15-11} = Opcode;
+
+ let Size=2;
+ field bits<16> SoftFail = 0;
}
//
// For 32 bit extended instruction forms.
//
class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
{
field bits<32> Inst;
-
+
+ let Size=4;
+ field bits<32> SoftFail = 0;
}
class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_32<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
{
let Inst{31-27} = 0b11110;
}
@@ -125,7 +90,7 @@ class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips Pseudo Instructions Format
class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst16<outs, ins, asmstr, pattern, IIPseudo, Pseudo16> {
+ MipsInst16<outs, ins, asmstr, pattern, IIPseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
@@ -137,7 +102,7 @@ class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<11> imm11;
@@ -152,7 +117,7 @@ class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<8> imm8;
@@ -169,7 +134,7 @@ class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -188,7 +153,7 @@ class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
//
class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
string asmstr, list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> subfunct;
@@ -208,7 +173,7 @@ class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
//
class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<6> _code; // code is a keyword in tablegen
bits<5> funct;
@@ -226,7 +191,7 @@ class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<1> nd;
@@ -252,7 +217,7 @@ class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -272,7 +237,7 @@ class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -294,7 +259,7 @@ class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI_A16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -316,7 +281,7 @@ class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmSHIFT16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -338,7 +303,7 @@ class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_TYPE16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> func;
bits<8> imm8;
@@ -356,7 +321,7 @@ class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
class FI8_MOVR3216<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOVR3216>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<4> ry;
@@ -378,7 +343,7 @@ class FI8_MOVR3216<dag outs, dag ins, string asmstr,
class FI8_MOV32R16<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOV32R16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> func;
@@ -402,7 +367,7 @@ class FI8_MOV32R16<dag outs, dag ins, string asmstr,
class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<1> s;
bits<1> ra = 0;
@@ -429,7 +394,7 @@ class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_32<outs, ins, asmstr, pattern, itin, FrmJAL16>
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
{
bits<1> X;
bits<26> imm26;
@@ -452,7 +417,7 @@ class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> eop;
@@ -474,7 +439,7 @@ class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
class FASMACRO16<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmASMACRO16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<3> select;
bits<3> p4;
@@ -503,7 +468,7 @@ class FASMACRO16<dag outs, dag ins, string asmstr,
class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RI16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> op;
@@ -527,7 +492,7 @@ class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<5> op;
bits<16> imm16;
@@ -552,7 +517,7 @@ class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI_A16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<15> imm15;
bits<3> rx;
@@ -578,7 +543,7 @@ class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_SHIFT16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<6> sa6;
bits<3> rx;
@@ -605,7 +570,7 @@ class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I816>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> I8;
@@ -630,7 +595,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<3> xsregs =0;
bits<8> framesize =0;
@@ -659,5 +624,3 @@ class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
}
-
-
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 619646b3178a..17dd2c07967a 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -12,18 +12,29 @@
//===----------------------------------------------------------------------===//
#include "Mips16InstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
using namespace llvm;
+static cl::opt<bool> NeverUseSaveRestore(
+ "mips16-never-use-save-restore",
+ cl::init(false),
+ cl::desc("For testing ability to adjust stack pointer "
+ "without save/restore instruction"),
+ cl::Hidden);
+
+
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm, Mips::BimmX16),
RI(*tm.getSubtargetImpl(), *this) {}
@@ -87,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void Mips16InstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -99,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::SwRxSpImmX16;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
}
void Mips16InstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -115,13 +125,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
Opc = Mips::LwRxSpImmX16;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
.addMemOperand(MMO);
}
bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
-
switch(MI->getDesc().getOpcode()) {
default:
return false;
@@ -160,20 +169,215 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
return 0;
}
+// Adjust SP by FrameSize bytes. Save RA, S0, S1
+void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base);
+ if (isInt<16>(-Remainder))
+ BuildAddiuSpImm(MBB, I, -Remainder);
+ else
+ adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1);
+ }
+
+ }
+ else {
+ //
+ // sw ra, -4[sp]
+ // sw s1, -8[sp]
+ // sw s0, -12[sp]
+
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::RA);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1);
+ }
+}
+
+// Adjust SP by FrameSize bytes. Restore RA, S0, S1
+void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ if (isInt<16>(Remainder))
+ BuildAddiuSpImm(MBB, I, Remainder);
+ else
+ adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base);
+ }
+ }
+ else {
+ adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1);
+ // lw ra, -4[sp]
+ // lw s1, -8[sp]
+ // lw s0, -12[sp]
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::A0);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::RA);
+ MIB0.addReg(Mips::A0);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ }
+
+}
+
+// Adjust SP by Amount bytes where bytes can be up to 32bit number.
+// This can only be called at times that we know that there is at least one free
+// register.
+// This is clearly safe at prologue and epilogue.
+//
+void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+// MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+// unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+// unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+ //
+ // li reg1, constant
+ // move reg2, sp
+ // add reg1, reg1, reg2
+ // move sp, reg1
+ //
+ //
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1);
+ MIB1.addImm(Amount);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2);
+ MIB2.addReg(Mips::SP, RegState::Kill);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1);
+ MIB3.addReg(Reg1);
+ MIB3.addReg(Reg2, RegState::Kill);
+ MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::SP);
+ MIB4.addReg(Reg1, RegState::Kill);
+}
+
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ assert(false && "adjust stack pointer amount exceeded");
+}
+
/// Adjust SP by Amount bytes.
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- if (isInt<16>(Amount)) {
- if (Amount < 0)
- BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount);
- else if (Amount > 0)
- BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount);
+ if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
+ BuildAddiuSpImm(MBB, I, Amount);
+ else
+ adjustStackPtrBigUnrestricted(SP, Amount, MBB, I);
+}
+
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+Mips16InstrInfo::loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned &NewImm) const {
+ //
+ // given original instruction is:
+ // Instr rx, T[offset] where offset is too big.
+ //
+ // lo = offset & 0xFFFF
+ // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF;
+ //
+ // let T = temporary register
+ // li T, hi
+ // shl T, 16
+ // add T, Rx, T
+ //
+ RegScavenger rs;
+ int32_t lo = Imm & 0xFFFF;
+ int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF;
+ NewImm = lo;
+ unsigned Reg =0;
+ unsigned SpReg = 0;
+ rs.enterBasicBlock(&MBB);
+ rs.forward(II);
+ //
+ // we use T0 for the first register, if we need to save something away.
+ // we use T1 for the second register, if we need to save something away.
+ //
+ unsigned FirstRegSaved =0, SecondRegSaved=0;
+ unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;
+
+ Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (Reg == 0) {
+ FirstRegSaved = Reg = Mips::V0;
+ FirstRegSavedTo = Mips::T0;
+ copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true);
}
else
- // not implemented for large values yet
- assert(false && "adjust stack pointer amount exceeded");
+ rs.setUsed(Reg);
+ BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi);
+ BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg).
+ addImm(16);
+ if (FrameReg == Mips::SP) {
+ SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (SpReg == 0) {
+ if (Reg != Mips::V1) {
+ SecondRegSaved = SpReg = Mips::V1;
+ SecondRegSavedTo = Mips::T1;
+ }
+ else {
+ SecondRegSaved = SpReg = Mips::V0;
+ SecondRegSavedTo = Mips::T0;
+ }
+ copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+ }
+ else
+ rs.setUsed(SpReg);
+
+ copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg)
+ .addReg(Reg);
+ }
+ else
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+ if (FirstRegSaved || SecondRegSaved) {
+ II = llvm::next(II);
+ if (FirstRegSaved)
+ copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true);
+ if (SecondRegSaved)
+ copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true);
+ }
+ return Reg;
}
unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
@@ -194,6 +398,20 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
}
+
+const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
+ if (validSpImm8(Imm))
+ return get(Mips::AddiuSpImm16);
+ else
+ return get(Mips::AddiuSpImmX16);
+}
+
+void Mips16InstrInfo::BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
+}
+
const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
return new Mips16InstrInfo(TM);
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e06ccfe61c52..a77a9043bb17 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -14,8 +14,8 @@
#ifndef MIPS16INSTRUCTIONINFO_H
#define MIPS16INSTRUCTIONINFO_H
-#include "MipsInstrInfo.h"
#include "Mips16RegisterInfo.h"
+#include "MipsInstrInfo.h"
namespace llvm {
@@ -48,31 +48,75 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+ // Adjust SP by FrameSize bytes. Save RA, S0, S1
+ void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // Adjust SP by FrameSize bytes. Restore RA, S0, S1
+ void restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+
/// Adjust SP by Amount bytes.
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ /// Emit a series of instructions to load an immediate.
+ // This is to adjust some FrameReg. We return the new register to be used
+ // in place of FrameReg and the adjusted immediate field (&NewImm)
+ //
+ unsigned loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned &NewImm) const;
+
+ static bool validSpImm8(int offset) {
+ return ((offset & 7) == 0) && isInt<11>(offset);
+ }
+
+ //
+ // build the proper one based on the Imm field
+ //
+
+ const MCInstrDesc& AddiuSpImm(int64_t Imm) const;
+
+ void BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
private:
virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
};
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 5defc75ea6ef..aa51aaf46565 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
// Mips Address
//
def addr16 :
- ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
//
// Address operand
@@ -32,18 +32,76 @@ def mem16_ea : Operand<i32> {
}
//
+//
+// I8 instruction format
+//
+
+class FI816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+
+class FI816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
+
+//
+// RI instruction format
+//
+
+
+class FRI16_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16R_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16R_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class F2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+class FRI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm # 16 bit inst"), [], itin>;
+//
// Compare a register and immediate and place result in CC
// Implicit use of T8
//
// EXT-CCRR Instruction format
//
-class FEXT_CCRXI16_ins<bits<5> _op, string asmstr,
- InstrItinClass itin>:
- FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
- !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> {
+class FEXT_CCRXI16_ins<string asmstr>:
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
+// JAL and JALX instruction format
+//
+class FJAL16_ins<bits<1> _X, string asmstr,
+ InstrItinClass itin>:
+ FJAL16<_X, (outs), (ins simm20:$imm),
+ !strconcat(asmstr, "\t$imm\n\tnop"),[],
+ itin> {
+ let isCodeGenOnly=1;
+}
//
// EXT-I instruction format
//
@@ -57,13 +115,17 @@ class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
class FEXT_I816_ins_base<bits<3> _func, string asmstr,
string asmstr2, InstrItinClass itin>:
- FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2),
+ FEXT_I816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
[], itin>;
class FEXT_I816_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
+class FEXT_I816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$$sp, $imm", itin>;
+
//
// Assembler formats in alphabetical order.
// Natural and pseudos are mixed together.
@@ -73,10 +135,11 @@ class FEXT_I816_ins<bits<3> _func, string asmstr,
//
// CC-RR Instruction format
//
-class FCCRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), [], itin> {
+class FCCRR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
@@ -92,6 +155,15 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+class FEXT_RI16R_ins_base<bits<5> _op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16R_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
@@ -149,25 +221,25 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
//
// EXT-T8I8
//
-class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2,
- InstrItinClass itin>:
- FEXT_I816<_func, (outs),
- (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
- !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
- !strconcat(asmstr, "\t$imm"))),[], itin> {
+class FEXT_T8I816_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+ !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+ !strconcat(asmstr, "\t$imm"))),[]> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
// EXT-T8I8I
//
-class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2,
- InstrItinClass itin>:
- FEXT_I816<_func, (outs),
- (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
- !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
- !strconcat(asmstr, "\t$targ"))), [], itin> {
+class FEXT_T8I8I16_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+ !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+ !strconcat(asmstr, "\t$targ"))), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
@@ -215,9 +287,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
!strconcat(asmstr, "\t$rx, $ry"), [], itin> {
}
-class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ;
+class FRR16R_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRRTR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ;
//
// maybe refactor but need a $zero as a dummy first parameter
@@ -253,7 +330,7 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
string asmstr, InstrItinClass itin>:
- FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
+ FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
!strconcat(asmstr, "\t $rx"), [], itin> ;
//
@@ -292,13 +369,13 @@ class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> :
//
// So this pseudo class only has one operand, i.e. op
//
-class Sel<bits<5> f1, string op, InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
- CPU16Regs:$rt),
- !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin,
- Pseudo16> {
- let isCodeGenOnly=1;
+class Sel<string op>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rt),
+ !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> {
+ //let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
//
@@ -316,16 +393,15 @@ class Sel<bits<5> f1, string op, InstrItinClass itin>:
// move $rd, $rs
//
//
-class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
- InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
- CPU16Regs:$rl, simm16:$imm),
- !strconcat(op2,
- !strconcat("\t$rl, $imm\n\t",
- !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
- Pseudo16> {
+class SeliT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rl, simm16:$imm),
+ !strconcat(op2,
+ !strconcat("\t$rl, $imm\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
//
@@ -340,18 +416,30 @@ class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
// move $rd, $rs
//
//
-class SelT<bits<5> f1, string op1, bits<5> f2, string op2,
- InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+class SelT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_),
+ (ins CPU16Regs:$rd, CPU16Regs:$rs,
CPU16Regs:$rl, CPU16Regs:$rr),
- !strconcat(op2,
- !strconcat("\t$rl, $rr\n\t",
- !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
- Pseudo16> {
+ !strconcat(op2,
+ !strconcat("\t$rl, $rr\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
+//
+// 32 bit constant
+//
+def imm32: Operand<i32>;
+
+def Constant32:
+ MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
+
+def LwConstant32:
+ MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm),
+ "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
+
//
// Some general instruction class info
@@ -385,14 +473,21 @@ class MayStore {
}
//
+
// Format: ADDIU rx, immediate MIPS16e
// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
// To add a constant to a 32-bit integer.
//
def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0> {
+ let AddedComplexity = 5;
+}
def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
- ArithLogic16Defs<0>;
+ ArithLogic16Defs<0> {
+ let isCodeGenOnly = 1;
+}
def AddiuRxRyOffMemX16:
FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
@@ -404,6 +499,25 @@ def AddiuRxRyOffMemX16:
// To add a constant to the program counter.
//
def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+
+//
+// Format: ADDIU sp, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended)
+// To add a constant to the stack pointer.
+//
+def AddiuSpImm16
+ : FI816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+ let AddedComplexity = 5;
+}
+
+def AddiuSpImmX16
+ : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+}
+
//
// Format: ADDU rz, rx, ry MIPS16e
// Purpose: Add Unsigned Word (3-Operand)
@@ -422,6 +536,14 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
//
// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
// Purpose: Branch on Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
@@ -435,6 +557,13 @@ def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
//
// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
@@ -445,20 +574,22 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16;
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
-def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16;
+def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16;
-def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>,
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">,
cbranch16;
-def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16;
+def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16;
-def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16;
+def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16;
-def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16;
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16;
-def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
cbranch16;
//
@@ -466,22 +597,52 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
// Purpose: Branch on T Not Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16;
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+ let Uses = [T8];
+}
-def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16;
+def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16;
-def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16;
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16;
-def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16;
+def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16;
-def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16;
+def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16;
-def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16;
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16;
-def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>,
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
cbranch16;
//
+// Format: CMP rx, ry MIPS16e
+// Purpose: Compare
+// To compare the contents of two GPRs.
+//
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate (Extended)
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+
+//
// Format: DIV rx, ry MIPS16e
// Purpose: Divide Word
// To divide 32-bit signed integers.
@@ -498,7 +659,19 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
let Defs = [HI, LO];
}
+//
+// Format: JAL target MIPS16e
+// Purpose: Jump and Link
+// To execute a procedure call within the current 256 MB-aligned
+// region and preserve the current ISA.
+//
+def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+ let isBranch = 1;
+ let hasDelaySlot = 0; // not true, but we add the nop for now
+ let isTerminator=1;
+ let isBarrier=1;
+}
//
// Format: JR ra MIPS16e
@@ -515,7 +688,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
let isBarrier=1;
}
-def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
@@ -533,7 +706,9 @@ def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
// Purpose: Load Byte (Extended)
// To load a byte from memory as a signed value.
//
-def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
//
// Format: LBU ry, offset(rx) MIPS16e
@@ -541,14 +716,18 @@ def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
// To load a byte from memory as a unsigned value.
//
def LbuRxRyOffMemX16:
- FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad;
+ FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
//
// Format: LH ry, offset(rx) MIPS16e
// Purpose: Load Halfword signed (Extended)
// To load a halfword from memory as a signed value.
//
-def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
//
// Format: LHU ry, offset(rx) MIPS16e
@@ -556,7 +735,16 @@ def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
// To load a halfword from memory as an unsigned value.
//
def LhuRxRyOffMemX16:
- FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad;
+ FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate
+// To load a constant into a GPR.
+//
+def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
//
// Format: LI rx, immediate MIPS16e
@@ -570,13 +758,17 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
// Format: LW rx, offset(sp) MIPS16e
// Purpose: Load Word (SP-Relative, Extended)
// To load an SP-relative word from memory as a signed value.
//
-def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad;
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+ let Uses = [SP];
+}
//
// Format: MOVE r32, rz MIPS16e
@@ -688,6 +880,8 @@ def RestoreRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
+ let Defs = [S0, S1, RA, SP];
+ let Uses = [SP];
}
// Use Restore to increment SP since SP is not a Mip 16 register, this
@@ -698,6 +892,8 @@ def RestoreIncSpF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"restore\t$frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
+ let Defs = [SP];
+ let Uses = [SP];
}
//
@@ -712,6 +908,8 @@ def SaveRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
+ let Uses = [RA, SP, S0, S1];
+ let Defs = [SP];
}
//
@@ -723,6 +921,8 @@ def SaveDecSpF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"save\t$frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
+ let Uses = [SP];
+ let Defs = [SP];
}
//
// Format: SB ry, offset(rx) MIPS16e
@@ -741,7 +941,7 @@ def SbRxRyOffMemX16:
// Purpose: if rt==0, do nothing
// else rs = rt
//
-def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
+def SelBeqZ: Sel<"beqz">;
//
// Format: SelTBteqZCmp rd, rs, rl, rr
@@ -749,7 +949,7 @@ def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
+def SelTBteqZCmp: SelT<"bteqz", "cmp">;
//
// Format: SelTBteqZCmpi rd, rs, rl, rr
@@ -757,7 +957,7 @@ def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
+def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">;
//
// Format: SelTBteqZSlt rd, rs, rl, rr
@@ -765,7 +965,7 @@ def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
+def SelTBteqZSlt: SelT<"bteqz", "slt">;
//
// Format: SelTBteqZSlti rd, rs, rl, rr
@@ -773,7 +973,7 @@ def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
+def SelTBteqZSlti: SeliT<"bteqz", "slti">;
//
// Format: SelTBteqZSltu rd, rs, rl, rr
@@ -781,7 +981,7 @@ def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
+def SelTBteqZSltu: SelT<"bteqz", "sltu">;
//
// Format: SelTBteqZSltiu rd, rs, rl, rr
@@ -789,14 +989,14 @@ def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>;
+def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">;
//
// Format: SelBnez rd, rs, rt
// Purpose: if rt!=0, do nothing
// else rs = rt
//
-def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
+def SelBneZ: Sel<"bnez">;
//
// Format: SelTBtneZCmp rd, rs, rl, rr
@@ -804,7 +1004,7 @@ def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
// If b!=0 then do nothing.
// if b0=0 then rd = rs
//
-def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
+def SelTBtneZCmp: SelT<"btnez", "cmp">;
//
// Format: SelTBtnezCmpi rd, rs, rl, rr
@@ -812,7 +1012,7 @@ def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
+def SelTBtneZCmpi: SeliT<"btnez", "cmpi">;
//
// Format: SelTBtneZSlt rd, rs, rl, rr
@@ -820,7 +1020,7 @@ def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
+def SelTBtneZSlt: SelT<"btnez", "slt">;
//
// Format: SelTBtneZSlti rd, rs, rl, rr
@@ -828,7 +1028,7 @@ def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
+def SelTBtneZSlti: SeliT<"btnez", "slti">;
//
// Format: SelTBtneZSltu rd, rs, rl, rr
@@ -836,7 +1036,7 @@ def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
+def SelTBtneZSltu: SelT<"btnez", "sltu">;
//
// Format: SelTBtneZSltiu rd, rs, rl, rr
@@ -844,7 +1044,7 @@ def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>;
+def SelTBtneZSltiu: SeliT<"btnez", "sltiu">;
//
//
// Format: SH ry, offset(rx) MIPS16e
@@ -868,39 +1068,78 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
//
def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+
//
// Format: SLTI rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate (Extended)
// To record the result of a less-than comparison with a constant.
//
-def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>;
+//
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
+
+// Format: SLTIU rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
//
// Format: SLTIU rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate Unsigned (Extended)
// To record the result of a less-than comparison with a constant.
//
-def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>;
+def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
//
// Format: SLT rx, ry MIPS16e
// Purpose: Set on Less Than
// To record the result of a less-than comparison.
//
-def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>;
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+ let Defs = [T8];
+}
-def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>;
+def SltCCRxRy16: FCCRR16_ins<"slt">;
// Format: SLTU rx, ry MIPS16e
// Purpose: Set on Less Than Unsigned
// To record the result of an unsigned less-than comparison.
//
-def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> {
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+ let Defs = [T8];
+}
+
+def SltuRxRyRz16: FRRTR16_ins<"sltu"> {
let isCodeGenOnly=1;
+ let Defs = [T8];
}
-def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>;
+def SltuCCRxRy16: FCCRR16_ins<"sltu">;
//
// Format: SRAV ry, rx MIPS16e
// Purpose: Shift Word Right Arithmetic Variable
@@ -996,6 +1235,7 @@ class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
(I CPU16Regs:$in, imm_type:$imm)>;
+def: ArithLogicI16_pat<add, immSExt8, AddiuRxRxImm16>;
def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
@@ -1029,14 +1269,19 @@ def: StoreM16_pat<store, SwRxRyOffMemX16>;
// Unconditional branch
class UncondBranch16_pat<SDNode OpNode, Instruction I>:
Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
- let Predicates = [RelocPIC, InMips16Mode];
+ let Predicates = [InMips16Mode];
}
+def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (Jal16 tglobaladdr:$dst)>;
+
+def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (Jal16 texternalsym:$dst)>;
+
// Indirect branch
def: Mips16Pat<
- (brind CPU16Regs:$rs),
- (JrcRx16 CPU16Regs:$rs)>;
-
+ (brind CPU16Regs:$rs),
+ (JrcRx16 CPU16Regs:$rs)>;
// Jump and Link (Call)
let isCall=1, hasDelaySlot=0 in
@@ -1221,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
// MipsDivRem
//
def: Mips16Pat
- <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+ <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry),
(DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
//
// MipsDivRemU
//
def: Mips16Pat
- <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+ <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry),
(DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
// signed a,b
@@ -1464,7 +1709,7 @@ def: Mips16Pat
//
def: Mips16Pat
<(setle CPU16Regs:$lhs, CPU16Regs:$rhs),
- (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>;
+ (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>;
//
// setlt
@@ -1524,7 +1769,11 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
// hi/lo relocs
-def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
+def : Mips16Pat<(MipsHi tglobaladdr:$in),
+ (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
+def : Mips16Pat<(MipsHi tjumptable:$in),
+ (SllX16 (LiRxImmX16 tjumptable:$in), 16)>;
+def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
(SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
// wrapper_pic
@@ -1539,4 +1788,4 @@ def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>;
def : Mips16Pat<(i32 (extloadi8 addr16:$src)),
(LbuRxRyOffMemX16 addr16:$src)>;
def : Mips16Pat<(i32 (extloadi16 addr16:$src)),
- (LhuRxRyOffMemX16 addr16:$src)>; \ No newline at end of file
+ (LhuRxRyOffMemX16 addr16:$src)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index d7397a32f074..6cca2276856d 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,3 +1,4 @@
+
//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
//
// The LLVM Compiler Infrastructure
@@ -14,28 +15,30 @@
#include "Mips16RegisterInfo.h"
#include "Mips16InstrInfo.h"
#include "Mips.h"
+#include "Mips16InstrInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
#include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -43,25 +46,36 @@ Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
const Mips16InstrInfo &I)
: MipsRegisterInfo(ST), TII(I) {}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void Mips16RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
+bool Mips16RegisterInfo::requiresRegisterScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
+bool Mips16RegisterInfo::requiresFrameIndexScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
- const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII);
+bool Mips16RegisterInfo::useFPForScavengingIndex
+ (const MachineFunction &MF) const {
+ return false;
+}
- II->adjustStackPtr(Mips::SP, Amount, MBB, I);
- }
+bool Mips16RegisterInfo::saveScavengerRegister
+ (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ DebugLoc DL;
+ TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
+ TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
+ return true;
+}
- MBB.erase(I);
+const TargetRegisterClass *
+Mips16RegisterInfo::intRegClass(unsigned Size) const {
+ assert(Size == 4);
+ return &Mips::CPU16RegsRegClass;
}
void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
@@ -114,13 +128,23 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// by adding the size of the stack:
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
+ bool IsKill = false;
Offset = SPOffset + (int64_t)StackSize;
Offset += MI.getOperand(OpNo + 1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
- MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) ||
+ ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned NewImm;
+ FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
+ }
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 153def20d085..2b3d2b1a4ecb 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -22,11 +22,23 @@ class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
const Mips16InstrInfo &TII;
public:
- Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII);
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const Mips16InstrInfo &TII);
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 83322eac8c62..846a8224af35 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -34,192 +34,202 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-// Shifts
-// 64-bit shift instructions.
let DecoderNamespace = "Mips64" in {
-class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode>:
- shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt,
- CPU64Regs>;
-
-// Mul, Div
-class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>:
- Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-
-multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>,
- Requires<[IsN64, HasStandardEncoding]> {
+
+multiclass Atomic2Ops64<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let isCodeGenOnly = 1;
}
}
-multiclass AtomicCmpSwap64<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass AtomicCmpSwap64<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let isCodeGenOnly = 1;
}
}
}
-let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding],
+let usesCustomInserter = 1, Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
- defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
- defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">;
- defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64, "load_or_64">;
- defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">;
- defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">;
- defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64, "swap_64">;
- defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64, "64">;
+ defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>;
+ defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>;
+ defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>;
+ defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>;
+ defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>;
+ defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
+ defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>;
+ defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>;
+ defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
}
+def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
+
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
-def DADDi : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16,
- CPU64Regs>;
-def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
- CPU64Regs>, IsAsCheapAsAMove;
-def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
-def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
-def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
-def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
-def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
-def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>;
+def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>;
+def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>,
+ ADDI_FM<0x19>, IsAsCheapAsAMove;
+def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xa>;
+def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xb>;
+def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
+def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def DADD : ArithOverflowR<0x00, 0x2C, "dadd", IIAlu, CPU64Regs, 1>;
-def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
-def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
-def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
-def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
-def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
-def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
-def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
-def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>;
+def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>,
+ ADD_FM<0, 0x2d>;
+def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>,
+ ADD_FM<0, 0x2f>;
+def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>;
+def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>;
+def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>;
-def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>;
-def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
-def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>;
-def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>;
-def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>;
-let Pattern = []<dag> in {
- def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
- def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
- def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
-}
+def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>,
+ SRA_FM<0x38, 0>;
+def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>,
+ SRA_FM<0x3a, 0>;
+def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>,
+ SRA_FM<0x3b, 0>;
+def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>;
+def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>;
+def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>;
}
// Rotate Instructions
-let Predicates = [HasMips64r2, HasStandardEncoding],
+let Predicates = [HasMips64r2, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
- def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
+ def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>,
+ SRA_FM<0x3a, 1>;
+ def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>,
+ SRLV_FM<0x16, 1>;
}
let DecoderNamespace = "Mips64" in {
/// Load and Store Instructions
/// aligned
-defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
-defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
-defm LH64 : LoadM64<0x21, "lh", sextloadi16>;
-defm LHu64 : LoadM64<0x25, "lhu", zextloadi16>;
-defm LW64 : LoadM64<0x23, "lw", sextloadi32>;
-defm LWu64 : LoadM64<0x27, "lwu", zextloadi32>;
-defm SB64 : StoreM64<0x28, "sb", truncstorei8>;
-defm SH64 : StoreM64<0x29, "sh", truncstorei16>;
-defm SW64 : StoreM64<0x2b, "sw", truncstorei32>;
-defm LD : LoadM64<0x37, "ld", load>;
-defm SD : StoreM64<0x3f, "sd", store>;
+defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>;
+defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>;
+defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>;
+defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>;
+defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>;
+defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>;
+defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>;
+defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>;
+defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>;
+defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>;
+defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>;
/// load/store left/right
-let isCodeGenOnly = 1 in {
- defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>;
- defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>;
- defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>;
- defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>;
-}
-defm LDL : LoadLeftRightM64<0x1a, "ldl", MipsLDL>;
-defm LDR : LoadLeftRightM64<0x1b, "ldr", MipsLDR>;
-defm SDL : StoreLeftRightM64<0x2c, "sdl", MipsSDL>;
-defm SDR : StoreLeftRightM64<0x2d, "sdr", MipsSDR>;
+defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
+defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
+defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
+defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
+
+defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>;
+defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>;
+defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>;
+defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>;
/// Load-linked, Store-conditional
-def LLD : LLBase<0x34, "lld", CPU64Regs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
-def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let isCodeGenOnly = 1;
+let Predicates = [NotN64, HasStdEnc] in {
+ def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>;
+ def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>;
}
-def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
-def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let isCodeGenOnly = 1;
+
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
+ def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>;
+ def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>;
}
/// Jump and Branch Instructions
-def JR64 : IndirectBranch<CPU64Regs>;
-def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
-def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
-def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
-def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
-def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>;
-def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+def JR64 : IndirectBranch<CPU64Regs>, MTLO_FM<8>;
+def BEQ64 : CBranch<"beq", seteq, CPU64Regs>, BEQ_FM<4>;
+def BNE64 : CBranch<"bne", setne, CPU64Regs>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", setge, CPU64Regs>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", setgt, CPU64Regs>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", setle, CPU64Regs>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>;
}
let DecoderNamespace = "Mips64" in
-def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
-def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall;
+def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM;
+def JALR64Pseudo : JumpLinkRegPseudo<CPU64Regs, JALR64, RA_64>;
+def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
let DecoderNamespace = "Mips64" in {
/// Multiply and Divide Instructions.
-def DMULT : Mult64<0x1c, "dmult", IIImul>;
-def DMULTu : Mult64<0x1d, "dmultu", IIImul>;
-def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
-def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
-
-def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>;
-def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>;
-def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>;
-def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>;
+def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1c>;
+def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1d>;
+def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult,
+ IIImul>;
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu,
+ IIImul>;
+def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem,
+ IIIdiv, 0>;
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU,
+ IIIdiv, 0>;
+
+def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", CPU64Regs, [HI64]>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", CPU64Regs, [LO64]>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>;
-def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>;
+def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>;
+def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>;
-def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
+def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>;
+def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>;
/// Double Word Swap Bytes/HalfWords
-def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
-def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
+def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>;
+def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>;
-def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
}
-let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
- Requires<[IsN64, HasStandardEncoding]>;
let DecoderNamespace = "Mips64" in {
-def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>;
let Pattern = []<dag> in {
- def DEXTU : ExtBase<2, "dextu", CPU64Regs>;
- def DEXTM : ExtBase<1, "dextm", CPU64Regs>;
+ def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>;
+ def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>;
}
-def DINS : InsBase<7, "dins", CPU64Regs>;
+def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>;
let Pattern = []<dag> in {
- def DINSU : InsBase<6, "dinsu", CPU64Regs>;
- def DINSM : InsBase<5, "dinsm", CPU64Regs>;
+ def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>;
+ def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>;
}
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -236,13 +246,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
//===----------------------------------------------------------------------===//
// extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
}
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
@@ -293,14 +303,10 @@ defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
- Requires<[IsN64, HasStandardEncoding]>;
-
// truncate
def : MipsPat<(i32 (trunc CPU64Regs:$src)),
(SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
- Requires<[IsN64, HasStandardEncoding]>;
+ Requires<[IsN64, HasStdEnc]>;
// 32-to-64-bit extension
def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
@@ -314,37 +320,73 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
+// mflo/hi patterns.
+def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
-
+def : InstAlias<"move $dst, $src",
+ (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
+def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"daddu $rs, $rt, $imm",
+ (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
+def : InstAlias<"dadd $rs, $rt, $imm",
+ (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>, Requires<[HasMips64]>;
/// Move between CPU and coprocessor registers
+
let DecoderNamespace = "Mips64" in {
-def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">;
-def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">;
-def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">;
-def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">;
-def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">;
-def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">;
+def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+ "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
+def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
+ "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
+def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+ "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
+def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
+ "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
}
+
// Two operand (implicit 0 selector) versions:
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc0 $rt, $rd",
+ (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc0 $rt, $rd",
+ (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
+def : InstAlias<"dmfc2 $rt, $rd",
+ (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc2 $rt, $rd",
+ (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index bf2818d61df0..1876cb6ffae4 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,31 +13,33 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-asm-printer"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
#include "Mips.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
@@ -65,19 +67,28 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- // Do any auto-generated pseudo lowerings.
- if (emitPseudoExpansionLowering(OutStreamer, MI))
- return;
-
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
- MCInst TmpInst0;
- MCInstLowering.Lower(I++, TmpInst0);
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, &*I))
+ continue;
+
+ // The inMips16Mode() test is not permanent.
+ // Some instructions are marked as pseudo right now which
+ // would make the test fail for the wrong reason but
+ // that will be fixed soon. We need this here because we are
+ // removing another test for this situation downstream in the
+ // callchain.
+ //
+ if (I->isPseudo() && !Subtarget->inMips16Mode())
+ llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+ MCInst TmpInst0;
+ MCInstLowering.Lower(I, TmpInst0);
OutStreamer.EmitInstruction(TmpInst0);
- } while ((I != E) && I->isInsideBundle()); // Delay slot check
+ } while ((++I != E) && I->isInsideBundle()); // Delay slot check
}
//===----------------------------------------------------------------------===//
@@ -139,7 +150,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
if (Mips::CPURegsRegClass.contains(Reg))
break;
- unsigned RegNum = getMipsRegisterNumbering(Reg);
+ unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -154,7 +165,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = getMipsRegisterNumbering(Reg);
+ unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -221,6 +232,11 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
// OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
}
+
+ if (Subtarget->inMicroMipsMode())
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
+ (unsigned)ELF::STO_MIPS_MICROMIPS);
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -236,10 +252,11 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
raw_svector_ostream OS(Str);
printSavedRegsBitmask(OS);
OutStreamer.EmitRawText(OS.str());
-
- OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
- OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
- OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ }
}
}
@@ -250,9 +267,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
// always be at the function end, and we can't emit and
// break with BB logic.
if (OutStreamer.hasRawTextSupport()) {
- OutStreamer.EmitRawText(StringRef("\t.set\tat"));
- OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
- OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ }
OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
}
}
@@ -540,6 +559,18 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// return to previous section
if (OutStreamer.hasRawTextSupport())
OutStreamer.EmitRawText(StringRef("\t.previous"));
+
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+ if (OutStreamer.hasRawTextSupport()) return;
+
+ // Emit Mips ELF register info
+ Subtarget->getMReginfo().emitMipsReginfoSectionCG(
+ OutStreamer, getObjFileLowering(), *Subtarget);
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitELFHeaderFlagsCG(*Subtarget);
}
MachineLocation
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 94d8bfa10569..dbdaf266b75f 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -14,8 +14,8 @@
#ifndef MIPSASMPRINTER_H
#define MIPSASMPRINTER_H
-#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
+#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Compiler.h"
@@ -80,6 +80,7 @@ public:
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
};
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 78cf140def60..462def76cc80 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -96,6 +96,12 @@ def RetCC_MipsN : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
]>;
+// In soft-mode, register A0_64, instead of V1_64, is used to return a long
+// double value.
+def RetCC_F128Soft : CallingConv<[
+ CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
+]>;
+
//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -139,17 +145,6 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
-// Mips Android Calling Convention
-//===----------------------------------------------------------------------===//
-
-def RetCC_MipsAndroid : CallingConv<[
- // f32 are returned in registers F0, F2, F1, F3
- CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
-
- CCDelegateTo<RetCC_MipsO32>
-]>;
-
-//===----------------------------------------------------------------------===//
// Mips FastCC Calling Convention
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
@@ -209,7 +204,6 @@ def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
- CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 05090b84dece..1d86d903c12e 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -14,22 +14,23 @@
#define DEBUG_TYPE "jit"
#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRelocations.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -62,67 +63,77 @@ class MipsCodeEmitter : public MachineFunctionPass {
static char ID;
- public:
- MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
- MachineFunctionPass(ID), JTI(0),
- II((const MipsInstrInfo *) tm.getInstrInfo()),
- TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
- }
+public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()),
+ TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const {
- return "Mips Machine Code Emitter";
- }
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ void emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB);
- void emitInstruction(const MachineInstr &MI);
+private:
- private:
+ void emitWord(unsigned Word);
- void emitWord(unsigned Word);
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
+ unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getBranchTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+ int Offset) const;
- void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
- int Offset) const;
- };
+ /// Expand pseudo instructions with accumulator register operands.
+ void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB, unsigned Opc) const;
+
+ /// \brief Expand pseudo instruction. Return true if MI was expanded.
+ bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const;
+};
}
char MipsCodeEmitter::ID = 0;
bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
- II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
- TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout();
+ MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
+ const_cast<TargetMachine &>(MF.getTarget()));
+
+ JTI = Target.getJITInfo();
+ II = Target.getInstrInfo();
+ TD = Target.getDataLayout();
Subtarget = &TM.getSubtarget<MipsSubtarget> ();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
@@ -139,8 +150,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E; ++I)
- emitInstruction(*I);
+ E = MBB->instr_end(); I != E;)
+ emitInstruction(*I++, *MBB);
}
} while (MCE.finishFunction(MF));
@@ -209,7 +220,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg())
- return getMipsRegisterNumbering(MO.getReg());
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isGlobal())
@@ -265,19 +276,21 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
Reloc, BB));
}
-void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
- MCE.processDebugLoc(MI.getDebugLoc(), true);
-
- // Skip pseudo instructions.
- if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ // Expand pseudo instruction. Skip if MI was not expanded.
+ if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
+ !expandPseudos(MI, MBB))
return;
- emitWord(getBinaryCodeForInstr(MI));
+ MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+ emitWord(getBinaryCodeForInstr(*MI));
++NumEmitted; // Keep track of the # of mi's emitted
- MCE.processDebugLoc(MI.getDebugLoc(), false);
+ MCE.processDebugLoc(MI->getDebugLoc(), false);
}
void MipsCodeEmitter::emitWord(unsigned Word) {
@@ -289,6 +302,57 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
MCE.emitWordBE(Word);
}
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB,
+ unsigned Opc) const {
+ // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
+ .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
+}
+
+bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const {
+ switch (MI->getOpcode()) {
+ case Mips::NOP:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
+ .addReg(Mips::ZERO).addImm(0);
+ break;
+ case Mips::JALRPseudo:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+ case Mips::PseudoMULT:
+ expandACCInstr(MI, MBB, Mips::MULT);
+ break;
+ case Mips::PseudoMULTu:
+ expandACCInstr(MI, MBB, Mips::MULTu);
+ break;
+ case Mips::PseudoSDIV:
+ expandACCInstr(MI, MBB, Mips::SDIV);
+ break;
+ case Mips::PseudoUDIV:
+ expandACCInstr(MI, MBB, Mips::UDIV);
+ break;
+ case Mips::PseudoMADD:
+ expandACCInstr(MI, MBB, Mips::MADD);
+ break;
+ case Mips::PseudoMADDU:
+ expandACCInstr(MI, MBB, Mips::MADDU);
+ break;
+ case Mips::PseudoMSUB:
+ expandACCInstr(MI, MBB, Mips::MSUB);
+ break;
+ case Mips::PseudoMSUBU:
+ expandACCInstr(MI, MBB, Mips::MSUBU);
+ break;
+ default:
+ return false;
+ }
+
+ (MI--)->eraseFromBundle();
+ return true;
+}
+
/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
/// code to the specified MCE object.
FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index b12b1f2b5ad4..42e4c99f05d6 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -16,42 +16,37 @@
// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
// conditional move instructions.
// cond:int, data:int
-class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct,
- string instr_asm> :
- FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
- let shamt = 0;
+class CMov_I_I_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
let Constraints = "$F = $rd";
}
// cond:int, data:float
-class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt,
- bits<6> func, string instr_asm> :
- FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
- !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> {
- bits<5> rt;
- let ft = rt;
+class CMov_I_F_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> {
let Constraints = "$F = $fd";
}
// cond:float, data:int
-class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf,
- string instr_asm> :
- FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F),
- !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"),
- [(set RC:$rd, (cmov RC:$rs, RC:$F))]> {
- let cc = 0;
+class CMov_F_I_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $$fcc0"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$F))], Itin, FrmFR> {
let Uses = [FCR31];
let Constraints = "$F = $rd";
}
// cond:float, data:float
-class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
- string instr_asm> :
- FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F),
- !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"),
- [(set RC:$fd, (cmov RC:$fs, RC:$F))]> {
- let cc = 0;
+class CMov_F_F_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $$fcc0"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$F))], Itin, FrmFR> {
let Uses = [FCR31];
let Constraints = "$F = $fd";
}
@@ -63,21 +58,23 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
Instruction SLTiuOp> {
def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
(MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
- def : MipsPat<
- (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : MipsPat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setgt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, (Plus1 imm:$rhs)), DRC:$F)>;
+ def : MipsPat<(select (i32 (setugt CRC:$lhs, immSExt16Plus1:$rhs)),
+ DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lhs, (Plus1 imm:$rhs)),
+ DRC:$F)>;
}
multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
@@ -106,88 +103,110 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
}
// Instantiation of instructions.
-def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
-let Predicates = [HasMips64, HasStandardEncoding],
+def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa>;
+let Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
- def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
+ def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa> {
let isCodeGenOnly = 1;
}
- def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> {
+ def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa> {
let isCodeGenOnly = 1;
}
}
-def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
-let Predicates = [HasMips64, HasStandardEncoding],
+def MOVN_I_I : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb>;
+let Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
- def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> {
+ def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb> {
let isCodeGenOnly = 1;
}
- def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> {
+ def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb> {
let isCodeGenOnly = 1;
}
}
-def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
-def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
- Requires<[HasMips64, HasStandardEncoding]> {
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>;
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
-def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
- Requires<[HasMips64, HasStandardEncoding]> {
+def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>;
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
- def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
- def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
}
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
- def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
+ def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17> {
let isCodeGenOnly = 1;
}
- def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">;
- def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> {
+ def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17> {
let isCodeGenOnly = 1;
}
}
-def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
-def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
- Requires<[HasMips64, HasStandardEncoding]> {
+def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>;
+def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>,
+ CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
-def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
- Requires<[HasMips64, HasStandardEncoding]> {
+def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>;
+def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>,
+ CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
-def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<16, 1>;
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<16, 0>;
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
- def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
- def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
}
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
- def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+ def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
}
// Instantiation of conditional move patterns.
defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
defm : MovzPats2<CPURegs, CPURegs, MOVZ_I_I, XORi>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
SLTiu64>;
@@ -202,7 +221,7 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
}
defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
@@ -211,19 +230,19 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
SLTiu64>;
defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
}
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
}
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
SLTiu64>;
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
new file mode 100644
index 000000000000..b5de1ebad22b
--- /dev/null
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -0,0 +1,85 @@
+//===-- MipsConstantIslandPass.cpp - Emit Pc Relative loads----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// This pass is used to make Pc relative loads of constants.
+// For now, only Mips16 will use this. While it has the same name and
+// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
+// to reuse any of the code from the ARM version.
+//
+// Loading constants inline is expensive on Mips16 and it's in general better
+// to place the constant nearby in code space and then it can be loaded with a
+// simple 16 bit load instruction.
+//
+// The constants can be not just numbers but addresses of functions and labels.
+// This can be particularly helpful in static relocation mode for embedded
+// non linux targets.
+//
+//
+
+#define DEBUG_TYPE "mips-constant-islands"
+
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ class MipsConstantIslands : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsConstantIslands(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Constant Islands";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ bool IsPIC;
+ unsigned ABI;
+
+ };
+
+ char MipsConstantIslands::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
+ return new MipsConstantIslands(tm);
+}
+
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+ return true;
+}
+
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 8e01d06596a1..a72a763fde06 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -24,8 +24,9 @@ class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
let Predicates = [HasDSP];
}
-class PseudoDSP<dag outs, dag ins, list<dag> pattern>:
- MipsPseudo<outs, ins, "", pattern> {
+class PseudoDSP<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
let Predicates = [HasDSP];
}
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index ef9402865b0d..3c116e1264b3 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -20,17 +20,18 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
// Mips-specific dsp nodes
-def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
-def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, untyped>]>;
+def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
class MipsDSPBase<string Opc, SDTypeProfile Prof> :
- SDNode<!strconcat("MipsISD::", Opc), Prof,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ SDNode<!strconcat("MipsISD::", Opc), Prof>;
class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
- SDNode<!strconcat("MipsISD::", Opc), Prof,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+ SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>;
def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
@@ -40,7 +41,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
-def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>;
def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
@@ -75,10 +76,6 @@ def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
// Flags.
-class IsCommutable {
- bit isCommutable = 1;
-}
-
class UseAC {
list<Register> Uses = [AC0];
}
@@ -387,7 +384,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs CPURegs:$rt);
- dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+ dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
list<Register> Defs = [DSPCtrl];
@@ -396,46 +393,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs CPURegs:$rt);
- dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+ dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
list<Register> Defs = [DSPCtrl];
}
-class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
- PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R1_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins simm16:$shift);
+class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R2_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs);
+class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class MTHLIP_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs);
+class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -458,39 +449,41 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<Register> Defs = [DSPCtrl];
}
-class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
- [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
+class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class DPA_W_PH_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
+class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
-}
-
-class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
- [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
- list<Register> Defs = [DSPCtrl, AC0];
+ list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))];
InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ bit isCommutable = 1;
}
-class MULT_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ string Constraints = "$acin = $ac";
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
- MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> {
+ MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
list<Register> Uses = [DSPCtrl];
bit usesCustomInserter = 1;
}
@@ -721,44 +714,40 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
NoItinerary, DSPRegs, DSPRegs>,
IsCommutable;
-class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
+ MipsMULSAQ_S_W_PH>;
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
// Dot product with accumulate/subtract
-class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
-
-class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
-class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>;
-class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
-class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
-class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
-
-class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
-
-class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
-
-class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
-
-class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
+class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>;
+class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>;
+class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>;
+class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
// Comparison
class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
@@ -871,11 +860,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
NoItinerary>;
-class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
-class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
@@ -979,23 +968,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
IsCommutable;
// Dot product with accumulate/subtract
-class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
-class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
-class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
+ MipsDPAQX_SA_W_PH>;
-class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
-class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
-class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
+ MipsDPSQX_SA_W_PH>;
-class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
// Precision reduce/expand
class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
@@ -1210,71 +1201,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
}
// Pseudos.
-def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
- MULSAQ_S_W_PH>;
-def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
- MAQ_S_W_PHL>;
-def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
- MAQ_S_W_PHR>;
-def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
- MAQ_SA_W_PHL>;
-def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
- MAQ_SA_W_PHR>;
-def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
- DPAU_H_QBL>;
-def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
- DPAU_H_QBR>;
-def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
- DPSU_H_QBL>;
-def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
- DPSU_H_QBR>;
-def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
- DPAQ_S_W_PH>;
-def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
- DPSQ_S_W_PH>;
-def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
- DPAQ_SA_L_W>;
-def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
- DPSQ_SA_L_W>;
-
-def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
- IsCommutable;
-def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
- IsCommutable;
-def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
- IsCommutable, UseAC;
-def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
- IsCommutable, UseAC;
-def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
- UseAC;
-def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
- UseAC;
-
-def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
-def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
-def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
-
-let Predicates = [HasDSPR2] in {
-
-def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
-def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
-def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
- DPAQX_S_W_PH>;
-def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
- DPAQX_SA_W_PH>;
-def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
- DPAX_W_PH>;
-def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
- DPSX_W_PH>;
-def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
- DPSQX_S_W_PH>;
-def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
- DPSQX_SA_W_PH>;
-def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
- MULSA_W_PH>;
-
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>;
+ defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
}
+def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
Pat<pattern, result>, Requires<[pred]>;
@@ -1300,10 +1234,12 @@ def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+ DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, CPURegs:$rs)>;
class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+ DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1317,3 +1253,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
+
+// mflo/hi patterns.
+let AddedComplexity = 20 in
+def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
+
+// Indexed load patterns.
+class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
+ DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
+ (Instr i32:$base, i32:$index)>;
+
+let AddedComplexity = 20 in {
+ def : IndexedLoadPat<zextloadi8, LBUX>;
+ def : IndexedLoadPat<sextloadi16, LHX>;
+ def : IndexedLoadPat<load, LWX>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3c8ed75cf91..d07a595af38a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,4 +1,4 @@
-//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===//
+//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,22 +7,28 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with useful instructions.
+// Simple pass to fill delay slots with useful instructions.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "delay-slot-filler"
#include "Mips.h"
+#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -33,27 +39,143 @@ STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
static cl::opt<bool> DisableDelaySlotFiller(
"disable-mips-delay-filler",
cl::init(false),
- cl::desc("Disable the delay slot filler, which attempts to fill the Mips"
- "delay slots with useful instructions."),
+ cl::desc("Fill all delay slots with NOPs."),
cl::Hidden);
-// This option can be used to silence complaints by machine verifier passes.
-static cl::opt<bool> SkipDelaySlotFiller(
- "skip-mips-delay-filler",
+static cl::opt<bool> DisableForwardSearch(
+ "disable-mips-df-forward-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search forward."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableSuccBBSearch(
+ "disable-mips-df-succbb-search",
+ cl::init(true),
+ cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
+ cl::Hidden);
+
+static cl::opt<bool> DisableBackwardSearch(
+ "disable-mips-df-backward-search",
cl::init(false),
- cl::desc("Skip MIPS' delay slot filling pass."),
+ cl::desc("Disallow MIPS delay filler to search backward."),
cl::Hidden);
namespace {
- struct Filler : public MachineFunctionPass {
- typedef MachineBasicBlock::instr_iterator InstrIter;
- typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter;
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
+
+ /// \brief A functor comparing edge weight of two blocks.
+ struct CmpWeight {
+ CmpWeight(const MachineBasicBlock &S,
+ const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {}
+
+ bool operator()(const MachineBasicBlock *Dst0,
+ const MachineBasicBlock *Dst1) const {
+ return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1);
+ }
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- InstrIter LastFiller;
+ const MachineBasicBlock &Src;
+ const MachineBranchProbabilityInfo &Prob;
+ };
- static char ID;
+ class RegDefsUses {
+ public:
+ RegDefsUses(TargetMachine &TM);
+ void init(const MachineInstr &MI);
+
+ /// This function sets all caller-saved registers in Defs.
+ void setCallerSaved(const MachineInstr &MI);
+
+ /// This function sets all unallocatable registers in Defs.
+ void setUnallocatableRegs(const MachineFunction &MF);
+
+ /// Set bits in Uses corresponding to MBB's live-out registers except for
+ /// the registers that are live-in to SuccBB.
+ void addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB);
+
+ bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
+
+ private:
+ bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
+ bool IsDef) const;
+
+ /// Returns true if Reg or its alias is in RegSet.
+ bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
+
+ const TargetRegisterInfo &TRI;
+ BitVector Defs, Uses;
+ };
+
+ /// Base class for inspecting loads and stores.
+ class InspectMemInstr {
+ public:
+ InspectMemInstr(bool ForbidMemInstr_)
+ : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
+ SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+
+ /// Return true if MI cannot be moved to delay slot.
+ bool hasHazard(const MachineInstr &MI);
+
+ virtual ~InspectMemInstr() {}
+
+ protected:
+ /// Flags indicating whether loads or stores have been seen.
+ bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+
+ /// Memory instructions are not allowed to move to delay slot if this flag
+ /// is true.
+ bool ForbidMemInstr;
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) = 0;
+ };
+
+ /// This subclass rejects any memory instructions.
+ class NoMemInstr : public InspectMemInstr {
+ public:
+ NoMemInstr() : InspectMemInstr(true) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+ };
+
+ /// This subclass accepts loads from stacks and constant loads.
+ class LoadFromStackOrConst : public InspectMemInstr {
+ public:
+ LoadFromStackOrConst() : InspectMemInstr(false) {}
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+ };
+
+ /// This subclass uses memory dependence information to determine whether a
+ /// memory instruction can be moved to a delay slot.
+ class MemDefsUses : public InspectMemInstr {
+ public:
+ MemDefsUses(const MachineFrameInfo *MFI);
+
+ private:
+ virtual bool hasHazard_(const MachineInstr &MI);
+
+ /// Update Defs and Uses. Return true if there exist dependences that
+ /// disqualify the delay slot candidate between V and values in Uses and
+ /// Defs.
+ bool updateDefsUses(const Value *V, bool MayStore);
+
+ /// Get the list of underlying objects of MI's memory operand.
+ bool getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const;
+
+ const MachineFrameInfo *MFI;
+ SmallPtrSet<const Value*, 4> Uses, Defs;
+
+ /// Flags indicating whether loads or stores with no underlying objects have
+ /// been seen.
+ bool SeenNoObjLoad, SeenNoObjStore;
+ };
+
+ class Filler : public MachineFunctionPass {
+ public:
Filler(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
@@ -61,11 +183,7 @@ namespace {
return "Mips Delay Slot Filler";
}
- bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) {
- if (SkipDelaySlotFiller)
- return false;
-
bool Changed = false;
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
@@ -73,66 +191,334 @@ namespace {
return Changed;
}
- bool isDelayFiller(MachineBasicBlock &MBB,
- InstrIter candidate);
-
- void insertCallUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
-
- void insertDefsUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
-
- bool IsRegInSet(SmallSet<unsigned, 32> &RegSet,
- unsigned Reg);
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
- bool delayHasHazard(InstrIter candidate,
- bool &sawLoad, bool &sawStore,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
+ private:
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
- bool
- findDelayInstr(MachineBasicBlock &MBB, InstrIter slot,
- InstrIter &Filler);
+ /// This function checks if it is valid to move Candidate to the delay slot
+ /// and returns true if it isn't. It also updates memory and register
+ /// dependence information.
+ bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const;
+
+ /// This function searches range [Begin, End) for an instruction that can be
+ /// moved to the delay slot. Returns true on success.
+ template<typename IterTy>
+ bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr &IM,
+ IterTy &Filler) const;
+
+ /// This function searches in the backward direction for an instruction that
+ /// can be moved to the delay slot. Returns true on success.
+ bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches MBB in the forward direction for an instruction
+ /// that can be moved to the delay slot. Returns true on success.
+ bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// This function searches one of MBB's successor blocks for an instruction
+ /// that can be moved to the delay slot and inserts clones of the
+ /// instruction into the successor's predecessor blocks.
+ bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
+
+ /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
+ /// successor block that is not a landing pad.
+ MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
+
+ /// This function analyzes MBB and returns an instruction with an unoccupied
+ /// slot that branches to Dst.
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+ getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
+
+ /// Examine Pred and see if it is possible to insert an instruction into
+ /// one of its branches delay slot or its end.
+ bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const;
+
+ bool terminateSearch(const MachineInstr &Candidate) const;
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ static char ID;
};
char Filler::ID = 0;
} // end of anonymous namespace
+static bool hasUnoccupiedSlot(const MachineInstr *MI) {
+ return MI->hasDelaySlot() && !MI->isBundledWithSucc();
+}
+
+/// This function inserts clones of Filler into predecessor blocks.
+static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
+ MachineFunction *MF = Filler->getParent()->getParent();
+
+ for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
+ if (I->second) {
+ MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+ ++UsefulSlots;
+ } else {
+ I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+ }
+ }
+}
+
+/// This function adds registers Filler defines to MBB's live-in register list.
+static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
+ for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Filler->getOperand(I);
+ unsigned R;
+
+ if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
+ continue;
+
+#ifndef NDEBUG
+ const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ "Shouldn't move an instruction with unallocatable registers across "
+ "basic block boundaries.");
+#endif
+
+ if (!MBB.isLiveIn(R))
+ MBB.addLiveIn(R);
+ }
+}
+
+RegDefsUses::RegDefsUses(TargetMachine &TM)
+ : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
+ Uses(TRI.getNumRegs(), false) {}
+
+void RegDefsUses::init(const MachineInstr &MI) {
+ // Add all register operands which are explicit and non-variadic.
+ update(MI, 0, MI.getDesc().getNumOperands());
+
+ // If MI is a call, add RA to Defs to prevent users of RA from going into
+ // delay slot.
+ if (MI.isCall())
+ Defs.set(Mips::RA);
+
+ // Add all implicit register operands of branch instructions except
+ // register AT.
+ if (MI.isBranch()) {
+ update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
+ Defs.reset(Mips::AT);
+ }
+}
+
+void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
+ assert(MI.isCall());
+
+ // If MI is a call, add all caller-saved registers to Defs.
+ BitVector CallerSavedRegs(TRI.getNumRegs(), true);
+
+ CallerSavedRegs.reset(Mips::ZERO);
+ CallerSavedRegs.reset(Mips::ZERO_64);
+
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+ for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
+ CallerSavedRegs.reset(*AI);
+
+ Defs |= CallerSavedRegs;
+}
+
+void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
+ BitVector AllocSet = TRI.getAllocatableSet(MF);
+
+ for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AllocSet.set(*AI);
+
+ AllocSet.set(Mips::ZERO);
+ AllocSet.set(Mips::ZERO_64);
+
+ Defs |= AllocSet.flip();
+}
+
+void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &SuccBB) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ if (*SI != &SuccBB)
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI)
+ Uses.set(*LI);
+}
+
+bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
+ BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
+ bool HasHazard = false;
+
+ for (unsigned I = Begin; I != End; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+
+ if (MO.isReg() && MO.getReg())
+ HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
+ }
+
+ Defs |= NewDefs;
+ Uses |= NewUses;
+
+ return HasHazard;
+}
+
+bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
+ unsigned Reg, bool IsDef) const {
+ if (IsDef) {
+ NewDefs.set(Reg);
+ // check whether Reg has already been defined or used.
+ return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
+ }
+
+ NewUses.set(Reg);
+ // check whether Reg has already been defined.
+ return isRegInSet(Defs, Reg);
+}
+
+bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ if (RegSet.test(*AI))
+ return true;
+ return false;
+}
+
+bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
+ if (!MI.mayStore() && !MI.mayLoad())
+ return false;
+
+ if (ForbidMemInstr)
+ return true;
+
+ OrigSeenLoad = SeenLoad;
+ OrigSeenStore = SeenStore;
+ SeenLoad |= MI.mayLoad();
+ SeenStore |= MI.mayStore();
+
+ // If MI is an ordered or volatile memory reference, disallow moving
+ // subsequent loads and stores to delay slot.
+ if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
+ ForbidMemInstr = true;
+ return true;
+ }
+
+ return hasHazard_(MI);
+}
+
+bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
+ if (MI.mayStore())
+ return true;
+
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return true;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ if (isa<FixedStackPseudoSourceValue>(V))
+ return false;
+
+ if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
+ return !PSV->PseudoSourceValue::isConstant(0) &&
+ (V != PseudoSourceValue::getStack());
+
+ return true;
+}
+
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
+
+bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
+ bool HasHazard = false;
+ SmallVector<const Value *, 4> Objs;
+
+ // Check underlying object list.
+ if (getUnderlyingObjects(MI, Objs)) {
+ for (SmallVector<const Value *, 4>::const_iterator I = Objs.begin();
+ I != Objs.end(); ++I)
+ HasHazard |= updateDefsUses(*I, MI.mayStore());
+
+ return HasHazard;
+ }
+
+ // No underlying objects found.
+ HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
+ HasHazard |= MI.mayLoad() || OrigSeenStore;
+
+ SeenNoObjLoad |= MI.mayLoad();
+ SeenNoObjStore |= MI.mayStore();
+
+ return HasHazard;
+}
+
+bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
+ if (MayStore)
+ return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+
+ Uses.insert(V);
+ return Defs.count(V) || SeenNoObjStore;
+}
+
+bool MemDefsUses::
+getUnderlyingObjects(const MachineInstr &MI,
+ SmallVectorImpl<const Value *> &Objects) const {
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ return false;
+
+ const Value *V = (*MI.memoperands_begin())->getValue();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+ for (SmallVector<Value*, 4>::iterator I = Objs.begin(), E = Objs.end();
+ I != E; ++I) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) {
+ if (PSV->isAliased(MFI))
+ return false;
+ } else if (!isIdentifiedObject(V))
+ return false;
+
+ Objects.push_back(*I);
+ }
+
+ return true;
+}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// We assume there is only one delay slot per delayed instruction.
-bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- LastFiller = MBB.instr_end();
-
- for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I)
- if (I->hasDelaySlot()) {
- ++FilledSlots;
- Changed = true;
-
- InstrIter D;
-
- // Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
- findDelayInstr(MBB, I, D)) {
- MBB.splice(llvm::next(I), &MBB, D);
- ++UsefulSlots;
- } else
- BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
-
- // Record the filler instruction that filled the delay slot.
- // The instruction after it will be visited in the next iteration.
- LastFiller = ++I;
-
- // Set InsideBundle bit so that the machine verifier doesn't expect this
- // instruction to be a terminator.
- LastFiller->setIsInsideBundle();
- }
- return Changed;
+ for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!hasUnoccupiedSlot(&*I))
+ continue;
+
+ ++FilledSlots;
+ Changed = true;
+
+ // Delay slot filling is disabled at -O0.
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (searchBackward(MBB, I))
+ continue;
+
+ if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I))
+ continue;
+ } else if (searchForward(MBB, I)) {
+ continue;
+ }
+ }
+
+ // Bundle the NOP to the instruction with the delay slot.
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+ MIBundleBuilder(MBB, I, llvm::next(llvm::next(I)));
+ }
+
+ return Changed;
}
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -141,129 +527,195 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
return new Filler(tm);
}
-bool Filler::findDelayInstr(MachineBasicBlock &MBB,
- InstrIter slot,
- InstrIter &Filler) {
- SmallSet<unsigned, 32> RegDefs;
- SmallSet<unsigned, 32> RegUses;
-
- insertDefsUses(slot, RegDefs, RegUses);
-
- bool sawLoad = false;
- bool sawStore = false;
-
- for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) {
+template<typename IterTy>
+bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+ RegDefsUses &RegDU, InspectMemInstr& IM,
+ IterTy &Filler) const {
+ for (IterTy I = Begin; I != End; ++I) {
// skip debug value
if (I->isDebugValue())
continue;
- // Convert to forward iterator.
- InstrIter FI(llvm::next(I).base());
-
- if (I->hasUnmodeledSideEffects()
- || I->isInlineAsm()
- || I->isLabel()
- || FI == LastFiller
- || I->isPseudo()
- //
- // Should not allow:
- // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
- // list. TBD.
- )
+ if (terminateSearch(*I))
break;
- if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
- insertDefsUses(FI, RegDefs, RegUses);
+ assert((!I->isCall() && !I->isReturn() && !I->isBranch()) &&
+ "Cannot put calls, returns or branches in delay slot.");
+
+ if (delayHasHazard(*I, RegDU, IM))
continue;
- }
- Filler = FI;
+ Filler = I;
return true;
}
return false;
}
-bool Filler::delayHasHazard(InstrIter candidate,
- bool &sawLoad, bool &sawStore,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses) {
- if (candidate->isImplicitDef() || candidate->isKill())
- return true;
+bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableBackwardSearch)
+ return false;
- // Loads or stores cannot be moved past a store to the delay slot
- // and stores cannot be moved past a load.
- if (candidate->mayLoad()) {
- if (sawStore)
- return true;
- sawLoad = true;
- }
+ RegDefsUses RegDU(TM);
+ MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+ ReverseIter Filler;
- if (candidate->mayStore()) {
- if (sawStore)
- return true;
- sawStore = true;
- if (sawLoad)
- return true;
+ RegDU.init(*Slot);
+
+ if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
- assert((!candidate->isCall() && !candidate->isReturn()) &&
- "Cannot put calls or returns in delay slot.");
+ return false;
+}
- for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
- const MachineOperand &MO = candidate->getOperand(i);
- unsigned Reg;
+bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
+ // Can handle only calls.
+ if (DisableForwardSearch || !Slot->isCall())
+ return false;
- if (!MO.isReg() || !(Reg = MO.getReg()))
- continue; // skip
+ RegDefsUses RegDU(TM);
+ NoMemInstr NM;
+ Iter Filler;
- if (MO.isDef()) {
- // check whether Reg is defined or used before delay slot.
- if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
- return true;
- }
- if (MO.isUse()) {
- // check whether Reg is defined before delay slot.
- if (IsRegInSet(RegDefs, Reg))
- return true;
- }
+ RegDU.setCallerSaved(*Slot);
+
+ if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
+ MBB.splice(llvm::next(Slot), &MBB, Filler);
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
+
return false;
}
-// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
-void Filler::insertDefsUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses) {
- // If MI is a call or return, just examine the explicit non-variadic operands.
- MCInstrDesc MCID = MI->getDesc();
- unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
- MI->getNumOperands();
+bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
+ if (DisableSuccBBSearch)
+ return false;
+
+ MachineBasicBlock *SuccBB = selectSuccBB(MBB);
+
+ if (!SuccBB)
+ return false;
+
+ RegDefsUses RegDU(TM);
+ bool HasMultipleSuccs = false;
+ BB2BrMap BrMap;
+ OwningPtr<InspectMemInstr> IM;
+ Iter Filler;
+
+ // Iterate over SuccBB's predecessor list.
+ for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
+ PE = SuccBB->pred_end(); PI != PE; ++PI)
+ if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ return false;
+
+ // Do not allow moving instructions which have unallocatable register operands
+ // across basic block boundaries.
+ RegDU.setUnallocatableRegs(*MBB.getParent());
+
+ // Only allow moving loads from stack or constants if any of the SuccBB's
+ // predecessors have multiple successors.
+ if (HasMultipleSuccs) {
+ IM.reset(new LoadFromStackOrConst());
+ } else {
+ const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
+ IM.reset(new MemDefsUses(MFI));
+ }
- // Add RA to RegDefs to prevent users of RA from going into delay slot.
- if (MI->isCall())
- RegDefs.insert(Mips::RA);
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+ return false;
- for (unsigned i = 0; i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- unsigned Reg;
+ insertDelayFiller(Filler, BrMap);
+ addLiveInRegs(Filler, *SuccBB);
+ Filler->eraseFromParent();
- if (!MO.isReg() || !(Reg = MO.getReg()))
- continue;
+ return true;
+}
+
+MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
+ if (B.succ_empty())
+ return NULL;
+
+ // Select the successor with the larget edge weight.
+ CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>());
+ MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp);
+ return S->isLandingPad() ? NULL : S;
+}
+
+std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
+ const MipsInstrInfo *TII =
+ static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ SmallVector<MachineOperand, 2> Cond;
+
+ MipsInstrInfo::BranchType R =
+ TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
+
+ if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
+ return std::make_pair(R, (MachineInstr*)NULL);
+
+ if (R != MipsInstrInfo::BT_CondUncond) {
+ if (!hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+
+ assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
+
+ return std::make_pair(R, BranchInstrs[0]);
+ }
+
+ assert((TrueBB == &Dst) || (FalseBB == &Dst));
- if (MO.isDef())
- RegDefs.insert(Reg);
- else if (MO.isUse())
- RegUses.insert(Reg);
+ // Examine the conditional branch. See if its slot is occupied.
+ if (hasUnoccupiedSlot(BranchInstrs[0]))
+ return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
+
+ // If that fails, try the unconditional branch.
+ if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
+ return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
+
+ return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+}
+
+bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+ RegDefsUses &RegDU, bool &HasMultipleSuccs,
+ BB2BrMap &BrMap) const {
+ std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
+ getBranch(Pred, Succ);
+
+ // Return if either getBranch wasn't able to analyze the branches or there
+ // were no branches with unoccupied slots.
+ if (P.first == MipsInstrInfo::BT_None)
+ return false;
+
+ if ((P.first != MipsInstrInfo::BT_Uncond) &&
+ (P.first != MipsInstrInfo::BT_NoBranch)) {
+ HasMultipleSuccs = true;
+ RegDU.addLiveOut(Pred, Succ);
}
+
+ BrMap[&Pred] = P.second;
+ return true;
}
-//returns true if the Reg or its alias is in the RegSet.
-bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) {
- // Check Reg and all aliased Registers.
- for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
- AI.isValid(); ++AI)
- if (RegSet.count(*AI))
- return true;
- return false;
+bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+ InspectMemInstr &IM) const {
+ bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+
+ HasHazard |= IM.hasHazard(Candidate);
+ HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
+
+ return HasHazard;
+}
+
+bool Filler::terminateSearch(const MachineInstr &Candidate) const {
+ return (Candidate.isTerminator() || Candidate.isCall() ||
+ Candidate.isLabel() || Candidate.isInlineAsm() ||
+ Candidate.hasUnmodeledSideEffects());
}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 2cad2a6264ab..eb9d49fefb2f 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -12,20 +12,20 @@
//===----------------------------------------------------------------------===//
#include "MipsFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index df52d92da830..6a5f79d0dfc4 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -26,9 +26,8 @@ protected:
const MipsSubtarget &STI;
public:
- explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
- sti.hasMips64() ? 16 : 8), STI(sti) {}
+ explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
+ : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
static const MipsFrameLowering *create(MipsTargetMachine &TM,
const MipsSubtarget &ST);
@@ -39,7 +38,7 @@ protected:
uint64_t estimateStackSize(const MachineFunction &MF) const;
};
-/// Create MipsInstrInfo objects.
+/// Create MipsFrameLowering objects.
const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c5fca7f4b27a..77b08cb11e0c 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,29 +12,29 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -45,263 +45,11 @@ using namespace llvm;
// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
// instructions for SelectionDAG operations.
//===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to MipsTargetMachine.
- MipsTargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
-
-public:
- explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
- // Include the pieces autogenerated from the target description.
- #include "MipsGenDAGISel.inc"
-
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const MipsTargetMachine &getTargetMachine() {
- return static_cast<const MipsTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const MipsInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
-
- SDValue getMips16SPAliasReg();
-
- void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
- std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
- EVT Ty, bool HasLo, bool HasHi);
-
- SDNode *Select(SDNode *N);
-
- // Complex Pattern.
- bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset);
-
- bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Alias);
-
- // getImm - Return a target constant with the specified value.
- inline SDValue getImm(const SDNode *Node, unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
- }
-
- void ProcessFunctionAfterISel(MachineFunction &MF);
- bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
- void InitGlobalBaseReg(MachineFunction &MF);
- void InitMips16SPAliasReg(MachineFunction &MF);
-
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
-
- if (Subtarget.isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
- else if (Subtarget.inMips16Mode())
- RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- V2 = RegInfo.createVirtualRegister(RC);
-
- if (Subtarget.isABI_N64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (Subtarget.inMips16Mode()) {
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
- BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
- .addReg(V1).addReg(V2);
- return;
- }
-
- if (MF.getTarget().getRelocationModel() == Reloc::Static) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(Subtarget.isABI_O32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->mips16SPAliasRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
- BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
- .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
- const MachineInstr& MI) {
- unsigned DstReg = 0, ZeroReg = 0;
-
- // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
- if ((MI.getOpcode() == Mips::ADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO;
- } else if ((MI.getOpcode() == Mips::DADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO_64;
- }
-
- if (!DstReg)
- return false;
-
- // Replace uses with ZeroReg.
- for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E;) {
- MachineOperand &MO = U.getOperand();
- unsigned OpNo = U.getOperandNo();
- MachineInstr *MI = MO.getParent();
- ++U;
-
- // Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
- continue;
-
- MO.setReg(ZeroReg);
- }
-
- return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
- InitGlobalBaseReg(MF);
- InitMips16SPAliasReg(MF);
-
- MachineRegisterInfo *MRI = &MF.getRegInfo();
-
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
- ++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- ReplaceUsesWithZeroReg(MRI, *I);
-}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- ProcessFunctionAfterISel(MF);
+ processFunctionAfterISel(MF);
return Ret;
}
@@ -313,230 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
- unsigned Mips16SPAliasReg =
- MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
-bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
- EVT ValTy = Addr.getValueType();
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
- }
-
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
-
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasMips32r2Or64())
- return false;
- }
-
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
- if (Parent) {
- switch (Parent->getOpcode()) {
- case ISD::LOAD: {
- LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- case ISD::STORE: {
- StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- }
- }
- AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
- return;
-
+bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-bool MipsDAGToDAGISel::SelectAddr16(
- SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
- SDValue &Alias) {
- EVT ValTy = Addr.getValueType();
-
- Alias = CurDAG->getTargetConstant(0, ValTy);
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- getMips16SPRefReg(Parent, Alias);
- return true;
- }
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- getMips16SPRefReg(Parent, Alias);
- }
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasMips32r2Or64())
- return false;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
- (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
- (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
- DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -547,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- ///
- // Instruction Selection not handled by the auto-generated
- // tablegen selection should be handled here.
- ///
- EVT NodeTy = Node->getValueType(0);
- unsigned MultOpc;
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
switch(Opcode) {
default: break;
- case ISD::SUBE:
- case ISD::ADDE: {
- bool inMips16Mode = Subtarget.inMips16Mode();
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- if (inMips16Mode)
- MOp = Mips::AdduRxRyRz16;
- else
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- if (inMips16Mode)
- MOp = Mips::SubuRxRyRz16;
- else
- MOp = Mips::SUBu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
-
- EVT VT = LHS.getValueType();
-
- unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
- unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
- SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
- SDValue(Carry,0), RHS);
-
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
-
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
- Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return NULL;
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
- dl, NodeTy, true, false).first;
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::MULHU ?
- Mips::MultuRxRy16 : Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
- }
-
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.hasMips64()) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO_64, MVT::i64);
- return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
- }
-
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
- break;
- }
-
- case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
- unsigned Size = CN->getValueSizeInBits(0);
-
- if (Size == 32)
- break;
-
- MipsAnalyzeImmediate AnalyzeImm;
- int64_t Imm = CN->getSExtValue();
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, false);
-
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
- DebugLoc DL = CN->getDebugLoc();
- SDNode *RegOpnd;
- SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
-
- // The first instruction can be a LUi which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == Mips::LUi64)
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
- else
- RegOpnd =
- CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
- ImmOpnd);
-
- // The remaining instructions in the sequence are handled here.
- for (++Inst; Inst != Seq.end(); ++Inst) {
- ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- SDValue(RegOpnd, 0), ImmOpnd);
- }
-
- return RegOpnd;
- }
-
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
@@ -716,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
"Unexpected unaligned loads/stores.");
break;
#endif
-
- case MipsISD::ThreadPointer: {
- EVT PtrVT = TLI.getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
-
- if (PtrVT == MVT::i32) {
- RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
- DestReg = Mips::V1;
- } else {
- RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
- DestReg = Mips::V1_64;
- }
-
- SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
- Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
- }
}
// Select the default instruction
@@ -766,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
- return new MipsDAGToDAGISel(TM);
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 000000000000..cf0f9c58aa9c
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b0dd0a766f70..e2219f257ecd 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -11,20 +11,14 @@
// selection DAG.
//
//===----------------------------------------------------------------------===//
-
#define DEBUG_TYPE "mips-lower"
#include "MipsISelLowering.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
-#include "MipsSubtarget.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -33,6 +27,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -43,10 +41,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
@@ -67,7 +61,7 @@ static const uint16_t Mips64DPRegs[8] = {
// If I is a shifted mask, set the size (Size) and the first bit of the
// mask (Pos), and return true.
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
-static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
if (!isShiftedMask_64(I))
return false;
@@ -76,7 +70,7 @@ static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
-static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
@@ -111,11 +105,12 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(Op, DAG, GOTFlag));
SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
MachinePointerInfo::getGOT(), false, false, false,
@@ -125,21 +120,23 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
}
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
- SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(Op, DAG, Flag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
- Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, GetGlobalReg(DAG, Ty));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
getTargetNode(Op, DAG, LoFlag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
@@ -155,21 +152,27 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
case MipsISD::FPCmp: return "MipsISD::FPCmp";
case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
case MipsISD::FPRound: return "MipsISD::FPRound";
+ case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI";
+ case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI";
+ case MipsISD::Mult: return "MipsISD::Mult";
+ case MipsISD::Multu: return "MipsISD::Multu";
case MipsISD::MAdd: return "MipsISD::MAdd";
case MipsISD::MAddu: return "MipsISD::MAddu";
case MipsISD::MSub: return "MipsISD::MSub";
case MipsISD::MSubu: return "MipsISD::MSubu";
case MipsISD::DivRem: return "MipsISD::DivRem";
case MipsISD::DivRemU: return "MipsISD::DivRemU";
+ case MipsISD::DivRem16: return "MipsISD::DivRem16";
+ case MipsISD::DivRemU16: return "MipsISD::DivRemU16";
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
case MipsISD::Wrapper: return "MipsISD::Wrapper";
- case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
case MipsISD::Ins: return "MipsISD::Ins";
@@ -191,7 +194,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::MTHLIP: return "MipsISD::MTHLIP";
case MipsISD::MULT: return "MipsISD::MULT";
case MipsISD::MULTU: return "MipsISD::MULTU";
- case MipsISD::MADD_DSP: return "MipsISD::MADD_DSPDSP";
+ case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP";
case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP";
case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP";
case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP";
@@ -205,50 +208,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
IsO32(Subtarget->isABI_O32()) {
-
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- // Set up the register classes
- addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
- if (HasMips64)
- addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
- if (Subtarget->inMips16Mode()) {
- addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- }
-
- if (Subtarget->hasDSP()) {
- MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
- // Expand all builtin opcodes.
- for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
-
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
- }
- }
-
- if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
- else
- addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
- }
- }
-
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -265,6 +229,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
// Mips Custom Operations
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
@@ -281,18 +246,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
- else {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- }
- if (!Subtarget->inMips16Mode()) {
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- }
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -330,8 +283,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Operations not directly supported by Mips.
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
@@ -361,6 +316,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
@@ -383,6 +340,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
@@ -399,21 +358,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
- }
-
setInsertFencesForAtomic(true);
if (!Subtarget->hasSEInReg()) {
@@ -438,8 +382,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTruncStoreAction(MVT::i64, MVT::i32, Custom);
}
- setTargetDAGCombine(ISD::ADDE);
- setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
setTargetDAGCombine(ISD::SELECT);
@@ -450,206 +392,27 @@ MipsTargetLowering(MipsTargetMachine &TM)
setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
- computeRegisterProperties();
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
- maxStoresPerMemcpy = 16;
+ MaxStoresPerMemcpy = 16;
}
-bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
- MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
- if (Subtarget->inMips16Mode())
- return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
- switch (SVT) {
- case MVT::i64:
- case MVT::i32:
- return true;
- default:
- return false;
- }
+ return llvm::createMipsSETargetLowering(TM);
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i32;
-}
-
-// SelectMadd -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
- // ADDENode's second operand must be a flag output of an ADDC node in order
- // for the matching to be successful.
- SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
- if (ADDCNode->getOpcode() != ISD::ADDC)
- return false;
-
- SDValue MultHi = ADDENode->getOperand(0);
- SDValue MultLo = ADDCNode->getOperand(0);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MADD only if ADDENode and ADDCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than ADDENode or ADDCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MADD instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDValue Chain = CurDAG->getEntryNode();
- DebugLoc dl = ADDENode->getDebugLoc();
-
- // create MipsMAdd(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
- SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ADDCNode->getOperand(1),// Lo0
- ADDENode->getOperand(1));// Hi0
-
- // create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
- MAdd);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
- Mips::HI, MVT::i32,
- CopyFromLo.getValue(2));
-
- // replace uses of adde and addc here
- if (!SDValue(ADDCNode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
-
- if (!SDValue(ADDENode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
-
- return true;
-}
-
-// SelectMsub -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
- // SUBENode's second operand must be a flag output of an SUBC node in order
- // for the matching to be successful.
- SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
- if (SUBCNode->getOpcode() != ISD::SUBC)
- return false;
-
- SDValue MultHi = SUBENode->getOperand(1);
- SDValue MultLo = SUBCNode->getOperand(1);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than SUBENode or SUBCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MSUB instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDValue Chain = CurDAG->getEntryNode();
- DebugLoc dl = SUBENode->getDebugLoc();
-
- // create MipsSub(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
- SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- SUBCNode->getOperand(0),// Lo0
- SUBENode->getOperand(0));// Hi0
-
- // create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
- MSub);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
- Mips::HI, MVT::i32,
- CopyFromLo.getValue(2));
-
- // replace uses of sube and subc here
- if (!SDValue(SUBCNode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
-
- if (!SDValue(SUBENode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
-
- return true;
-}
-
-static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- SelectMadd(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
-static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- SelectMsub(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
}
-static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
@@ -658,18 +421,18 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
EVT Ty = N->getValueType(0);
unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
- unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
- MipsISD::DivRemU;
- DebugLoc dl = N->getDebugLoc();
+ unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
+ MipsISD::DivRemU16;
+ DebugLoc DL = N->getDebugLoc();
- SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+ SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
N->getOperand(0), N->getOperand(1));
SDValue InChain = DAG.getEntryNode();
SDValue InGlue = DivRem;
// insert MFLO
if (N->hasAnyUseOfValue(0)) {
- SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty,
InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
InChain = CopyFromLo.getValue(1);
@@ -678,7 +441,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
// insert MFHI
if (N->hasAnyUseOfValue(1)) {
- SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL,
HI, Ty, InGlue);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
}
@@ -713,8 +476,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
}
-// Returns true if condition code has to be inverted.
-static bool InvertFPCondCode(Mips::CondCode CC) {
+/// This function returns true if the floating point conditional branches and
+/// conditional moves which use condition code CC should be inverted.
+static bool invertFPCondCodeUser(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return false;
@@ -726,7 +490,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
// Creates and returns an FPCmp node from a setcc node.
// Returns Op if setcc is not a floating point comparison.
-static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
+static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
// must be a SETCC node
if (Op.getOpcode() != ISD::SETCC)
return Op;
@@ -737,28 +501,27 @@ static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
return Op;
SDValue RHS = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
// Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
// node if necessary.
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+ return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
}
// Creates and returns a CMovFPT/F node.
-static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
+static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
SDValue False, DebugLoc DL) {
- bool invert = InvertFPCondCode((Mips::CondCode)
- cast<ConstantSDNode>(Cond.getOperand(2))
- ->getSExtValue());
+ ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
+ bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue());
return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
@@ -791,7 +554,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// Pattern match EXT.
@@ -817,7 +580,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
- !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
// Return if the shifted mask does not start at bit 0 or the sum of its size
@@ -831,7 +594,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(SMSize, MVT::i32));
}
-static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// Pattern match INS.
@@ -850,7 +613,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
- !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
return SDValue();
// See if Op's second operand matches (and (shl $src, pos), mask1).
@@ -858,7 +621,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
- !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
return SDValue();
// The shift masks must have the same position and size.
@@ -885,7 +648,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
}
-static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
// (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
@@ -915,25 +678,21 @@ static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
- unsigned opc = N->getOpcode();
+ unsigned Opc = N->getOpcode();
- switch (opc) {
+ switch (Opc) {
default: break;
- case ISD::ADDE:
- return PerformADDECombine(N, DAG, DCI, Subtarget);
- case ISD::SUBE:
- return PerformSUBECombine(N, DAG, DCI, Subtarget);
case ISD::SDIVREM:
case ISD::UDIVREM:
- return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+ return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
- return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ return performSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
- return PerformANDCombine(N, DAG, DCI, Subtarget);
+ return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
- return PerformORCombine(N, DAG, DCI, Subtarget);
+ return performORCombine(N, DAG, DCI, Subtarget);
case ISD::ADD:
- return PerformADDCombine(N, DAG, DCI, Subtarget);
+ return performADDCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -964,30 +723,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode())
{
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
- case ISD::FABS: return LowerFABS(Op, DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
- case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
- case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
- case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, true);
- case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
- case ISD::ADD: return LowerADD(Op, DAG);
+ case ISD::BR_JT: return lowerBR_JT(Op, DAG);
+ case ISD::BRCOND: return lowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return lowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return lowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return lowerJumpTable(Op, DAG);
+ case ISD::SELECT: return lowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return lowerSETCC(Op, DAG);
+ case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return lowerFABS(Op, DAG);
+ case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
+ case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
+ case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::ADD: return lowerADD(Op, DAG);
}
return SDValue();
}
@@ -996,287 +757,133 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
// Lower helper functions
//===----------------------------------------------------------------------===//
-// AddLiveIn - This helper function adds the specified physical register to the
+// addLiveIn - This helper function adds the specified physical register to the
// MachineFunction as a live in value. It also creates a corresponding
// virtual register for it.
static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
+addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
{
- assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
}
-// Get fp branch code (not opcode) from condition code.
-static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
- if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
- return Mips::BRANCH_T;
-
- assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
- "Invalid CondCode.");
-
- return Mips::BRANCH_F;
-}
-
-/*
-static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
- DebugLoc dl,
- const MipsSubtarget *Subtarget,
- const TargetInstrInfo *TII,
- bool isFPCmp, unsigned Opc) {
- // There is no need to expand CMov instructions if target has
- // conditional moves.
- if (Subtarget->hasCondMov())
- return BB;
-
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // Emit the right instruction according to the type of the operands compared
- if (isFPCmp)
- BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
- else
- BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
- .addReg(Mips::ZERO).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- if (isFPCmp)
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
- else
- BuildMI(*BB, BB->begin(), dl,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-*/
-
-MachineBasicBlock *
-MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
- // $bb:
- // bposge32_pseudo $vr0
- // =>
- // $bb:
- // bposge32 $tbb
- // $fbb:
- // li $vr2, 0
- // b $sink
- // $tbb:
- // li $vr1, 1
- // $sink:
- // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
- MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
- DebugLoc DL = MI->getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, FBB);
- F->insert(It, TBB);
- F->insert(It, Sink);
-
- // Transfer the remainder of BB and its successor edges to Sink.
- Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- Sink->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add successors.
- BB->addSuccessor(FBB);
- BB->addSuccessor(TBB);
- FBB->addSuccessor(Sink);
- TBB->addSuccessor(Sink);
-
- // Insert the real bposge32 instruction to $BB.
- BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
- // Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
- .addReg(Mips::ZERO).addImm(0);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
- // Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
- BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
- .addReg(Mips::ZERO).addImm(1);
-
- // Insert phi function to $Sink.
- BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
- MI->getOperand(0).getReg())
- .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return Sink;
-}
-
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: llvm_unreachable("Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
case Mips::ATOMIC_LOAD_ADD_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
case Mips::ATOMIC_LOAD_ADD_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
case Mips::ATOMIC_LOAD_ADD_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+ return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I64:
case Mips::ATOMIC_LOAD_ADD_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::DADDu);
+ return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
case Mips::ATOMIC_LOAD_AND_I8:
case Mips::ATOMIC_LOAD_AND_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
case Mips::ATOMIC_LOAD_AND_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
case Mips::ATOMIC_LOAD_AND_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+ return emitAtomicBinary(MI, BB, 4, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I64:
case Mips::ATOMIC_LOAD_AND_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::AND64);
+ return emitAtomicBinary(MI, BB, 8, Mips::AND64);
case Mips::ATOMIC_LOAD_OR_I8:
case Mips::ATOMIC_LOAD_OR_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
case Mips::ATOMIC_LOAD_OR_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
case Mips::ATOMIC_LOAD_OR_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+ return emitAtomicBinary(MI, BB, 4, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I64:
case Mips::ATOMIC_LOAD_OR_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::OR64);
+ return emitAtomicBinary(MI, BB, 8, Mips::OR64);
case Mips::ATOMIC_LOAD_XOR_I8:
case Mips::ATOMIC_LOAD_XOR_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
case Mips::ATOMIC_LOAD_XOR_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
case Mips::ATOMIC_LOAD_XOR_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+ return emitAtomicBinary(MI, BB, 4, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I64:
case Mips::ATOMIC_LOAD_XOR_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::XOR64);
+ return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
case Mips::ATOMIC_LOAD_NAND_I8:
case Mips::ATOMIC_LOAD_NAND_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+ return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
case Mips::ATOMIC_LOAD_NAND_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+ return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
case Mips::ATOMIC_LOAD_NAND_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, 0, true);
+ return emitAtomicBinary(MI, BB, 4, 0, true);
case Mips::ATOMIC_LOAD_NAND_I64:
case Mips::ATOMIC_LOAD_NAND_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, 0, true);
+ return emitAtomicBinary(MI, BB, 8, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
case Mips::ATOMIC_LOAD_SUB_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+ return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
case Mips::ATOMIC_LOAD_SUB_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+ return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
case Mips::ATOMIC_LOAD_SUB_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+ return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I64:
case Mips::ATOMIC_LOAD_SUB_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu);
+ return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
case Mips::ATOMIC_SWAP_I8:
case Mips::ATOMIC_SWAP_I8_P8:
- return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+ return emitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
case Mips::ATOMIC_SWAP_I16_P8:
- return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+ return emitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
case Mips::ATOMIC_SWAP_I32_P8:
- return EmitAtomicBinary(MI, BB, 4, 0);
+ return emitAtomicBinary(MI, BB, 4, 0);
case Mips::ATOMIC_SWAP_I64:
case Mips::ATOMIC_SWAP_I64_P8:
- return EmitAtomicBinary(MI, BB, 8, 0);
+ return emitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
case Mips::ATOMIC_CMP_SWAP_I8_P8:
- return EmitAtomicCmpSwapPartword(MI, BB, 1);
+ return emitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
case Mips::ATOMIC_CMP_SWAP_I16_P8:
- return EmitAtomicCmpSwapPartword(MI, BB, 2);
+ return emitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
case Mips::ATOMIC_CMP_SWAP_I32_P8:
- return EmitAtomicCmpSwap(MI, BB, 4);
+ return emitAtomicCmpSwap(MI, BB, 4);
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
- return EmitAtomicCmpSwap(MI, BB, 8);
- case Mips::BPOSGE32_PSEUDO:
- return EmitBPOSGE32(MI, BB);
+ return emitAtomicCmpSwap(MI, BB, 8);
}
}
// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
@@ -1285,7 +892,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
@@ -1341,20 +948,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// sc success, storeval, 0(ptr)
// beq success, $0, loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
if (Nand) {
// and andres, oldval, incr
// nor storeval, $0, andres
- BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
- BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
+ BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
} else if (BinOpcode) {
// <binop> storeval, oldval, incr
- BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
+ BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
} else {
StoreVal = Incr;
}
- BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
+ BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1362,7 +969,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
@@ -1373,7 +980,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
@@ -1432,18 +1039,18 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// sll incr2,incr,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
.addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
- BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
.addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
.addReg(ShiftAmt).addReg(MaskUpper);
- BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
// atomic.load.binop
// loopMBB:
@@ -1465,32 +1072,32 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
// and newval, binopres, mask
- BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+ BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes)
.addReg(Mips::ZERO).addReg(AndRes);
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else if (BinOpcode) {
// <binop> binopres, oldval, incr2
// and newval, binopres, mask
- BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+ BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
} else {// atomic.swap
// and newval, incr2, mask
- BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
// sinkMBB:
@@ -1501,13 +1108,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(ShiftAmt).addReg(MaskedOldVal1);
- BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
.addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1516,7 +1123,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
@@ -1525,7 +1132,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
if (Size == 4) {
@@ -1579,17 +1186,17 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ll dest, 0(ptr)
// bne dest, oldval, exitMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BNE))
+ BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+ BuildMI(BB, DL, TII->get(BNE))
.addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
// sc success, newval, 0(ptr)
// beq success, $0, loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(NewVal).addReg(Ptr).addImm(0);
- BuildMI(BB, dl, TII->get(BEQ))
+ BuildMI(BB, DL, TII->get(BEQ))
.addReg(Success).addReg(ZERO).addMBB(loop1MBB);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1598,7 +1205,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 1 || Size == 2) &&
@@ -1608,7 +1215,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
@@ -1675,24 +1282,24 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// andi maskednewval,newval,255
// sll shiftednewval,maskednewval,shiftamt
int64_t MaskImm = (Size == 1) ? 255 : 65535;
- BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+ BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
.addReg(Mips::ZERO).addImm(-4);
- BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+ BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
- BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
- BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+ BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
.addReg(Mips::ZERO).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
.addReg(ShiftAmt).addReg(MaskUpper);
- BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+ BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal)
.addReg(CmpVal).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal)
.addReg(ShiftAmt).addReg(MaskedCmpVal);
- BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+ BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal)
.addReg(NewVal).addImm(MaskImm);
- BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
.addReg(ShiftAmt).addReg(MaskedNewVal);
// loop1MBB:
@@ -1700,10 +1307,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+ BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
- BuildMI(BB, dl, TII->get(Mips::BNE))
+ BuildMI(BB, DL, TII->get(Mips::BNE))
.addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
// loop2MBB:
@@ -1712,13 +1319,13 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// sc success,storeval,0(alignedaddr)
// beq success,$0,loop1MBB
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+ BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask2);
- BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+ BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, dl, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
- BuildMI(BB, dl, TII->get(Mips::BEQ))
+ BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
// sinkMBB:
@@ -1728,11 +1335,11 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
BB = sinkMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
- BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+ BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(ShiftAmt).addReg(MaskedOldVal0);
- BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+ BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
.addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+ BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1743,16 +1350,46 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
//===----------------------------------------------------------------------===//
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PTy = getPointerTy();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+
+ Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
+ DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT, false, false,
+ 0);
+ Chain = Addr.getValue(1);
+
+ if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || IsN64) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr,
+ getPICJumpTableRelocBase(Table, DAG));
+ }
+
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
+}
+
SDValue MipsTargetLowering::
-LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
// the block to branch to if the condition is true.
SDValue Chain = Op.getOperand(0);
SDValue Dest = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+ SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
// Return if flag is not set by a floating point comparison.
if (CondRes.getOpcode() != MipsISD::FPCmp)
@@ -1761,27 +1398,27 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
SDValue CCNode = CondRes.getOperand(2);
Mips::CondCode CC =
(Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
- SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
-
- return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
+ SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
+ return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
Dest, CondRes);
}
SDValue MipsTargetLowering::
-LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+ SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
// Return if flag is not set by a floating point comparison.
if (Cond.getOpcode() != MipsISD::FPCmp)
return Op;
- return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
Op.getDebugLoc());
}
SDValue MipsTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getOperand(0).getValueType();
@@ -1793,8 +1430,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(3));
}
-SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Cond = CreateFPCmp(DAG, Op);
+SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = createFPCmp(DAG, Op);
assert(Cond.getOpcode() == MipsISD::FPCmp &&
"Floating point operand expected.");
@@ -1802,13 +1439,13 @@ SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue True = DAG.getConstant(1, MVT::i32);
SDValue False = DAG.getConstant(0, MVT::i32);
- return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+ return createCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
}
-SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
@@ -1817,12 +1454,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
// %gp_rel relocation
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
MipsII::MO_GPREL);
- SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl,
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
DAG.getVTList(MVT::i32), &GA, 1);
SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode);
+ return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
}
// %hi/%lo relocation
@@ -1840,7 +1477,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
}
-SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
return getAddrNonPIC(Op, DAG);
@@ -1849,14 +1486,14 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
}
SDValue MipsTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
// If the relocation model is PIC, use the General Dynamic TLS Model or
// Local Dynamic TLS model, otherwise use the Initial Exec or
// Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- DebugLoc dl = GA->getDebugLoc();
+ DebugLoc DL = GA->getDebugLoc();
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
@@ -1867,9 +1504,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
: MipsII::MO_TLSGD;
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
- SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
- GetGlobalReg(DAG, PtrVT), TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT,
+ getGlobalReg(DAG, PtrVT), TGA);
unsigned PtrSize = PtrVT.getSizeInBits();
IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
@@ -1883,9 +1520,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*doesNotRet=*/false,
+ /*IsTailCall=*/false, /*doesNotRet=*/false,
/*isReturnValueUsed=*/true,
- TlsGetAddr, Args, DAG, dl);
+ TlsGetAddr, Args, DAG, DL);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1893,44 +1530,44 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (model != TLSModel::LocalDynamic)
return Ret;
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_DTPREL_HI);
- SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_DTPREL_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
- return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo);
}
SDValue Offset;
if (model == TLSModel::InitialExec) {
// Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_GOTTPREL);
- TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+ TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT),
TGA);
- Offset = DAG.getLoad(PtrVT, dl,
+ Offset = DAG.getLoad(PtrVT, DL,
DAG.getEntryNode(), TGA, MachinePointerInfo(),
false, false, false, 0);
} else {
// Local Exec TLS Model
assert(model == TLSModel::LocalExec);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
}
- SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
- return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
}
SDValue MipsTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
return getAddrNonPIC(Op, DAG);
@@ -1939,7 +1576,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
{
// gp_rel relocation
// FIXME: we should reference the constant pool using small data sections,
@@ -1957,22 +1594,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
return getAddrLocal(Op, DAG, HasMips64);
}
-SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
EVT TyX = Op.getOperand(0).getValueType();
EVT TyY = Op.getOperand(1).getValueType();
SDValue Const1 = DAG.getConstant(1, MVT::i32);
@@ -2017,7 +1654,7 @@ static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
@@ -2066,14 +1703,14 @@ static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
}
SDValue
-MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64())
- return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
- return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
}
-static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
DebugLoc DL = Op.getDebugLoc();
@@ -2102,7 +1739,7 @@ static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
DebugLoc DL = Op.getDebugLoc();
@@ -2123,15 +1760,15 @@ static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
}
SDValue
-MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
- return LowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
- return LowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
}
SDValue MipsTargetLowering::
-LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// check the depth
assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
"Frame address can only be determined for current frame.");
@@ -2139,13 +1776,13 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
IsN64 ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
-SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
// check the depth
assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
@@ -2153,7 +1790,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA;
MFI->setReturnAddressIsTaken(true);
@@ -2162,26 +1799,54 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT);
}
+// An EH_RETURN is the result of lowering llvm.eh.return which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setCallsEhReturn();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+
+ // Store stack offset in V1, store jump target in V0. Glue CopyToReg and
+ // EH_RETURN nodes, so that instructions are emitted back-to-back.
+ unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0;
+ Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
+ Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
+ return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
+ DAG.getRegister(OffsetReg, Ty),
+ DAG.getRegister(AddrReg, getPointerTy()),
+ Chain.getValue(1));
+}
+
// TODO: set SType according to the desired memory barrier behavior.
SDValue
-MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
unsigned SType = 0;
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(SType, MVT::i32));
}
-SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
// FIXME: Need pseudo-fence for 'singlethread' fences
// FIXME: Set SType for weaker fences where supported/appropriate.
unsigned SType = 0;
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(SType, MVT::i32));
}
-SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
+SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2212,7 +1877,7 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
bool IsSRA) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2271,7 +1936,7 @@ static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
}
// Expand an unaligned 32 or 64-bit integer load node.
-SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
@@ -2349,7 +2014,7 @@ static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
}
// Expand an unaligned 32 or 64-bit integer store node.
-SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *SD = cast<StoreSDNode>(Op);
EVT MemVT = SD->getMemoryVT();
@@ -2385,6 +2050,22 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
}
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
// This function expands mips intrinsic nodes which have 64-bit input operands
// or output values.
//
@@ -2397,140 +2078,143 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v1 = copy hi
// out64 = merge-values (v0, v1)
//
-static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
- unsigned Opc, bool HasI64In, bool HasI64Out) {
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
DebugLoc DL = Op.getDebugLoc();
bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
- SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
SmallVector<SDValue, 3> Ops;
+ unsigned OpNo = 0;
- if (HasI64In) {
- SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op->getOperand(1 + HasChainIn),
- DAG.getConstant(0, MVT::i32));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op->getOperand(1 + HasChainIn),
- DAG.getConstant(1, MVT::i32));
+ // See if Op has a chain input.
+ if (HasChainIn)
+ Ops.push_back(Op->getOperand(OpNo++));
- Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
- Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
+ // The next operand is the intrinsic opcode.
+ assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
- Ops.push_back(Chain);
- Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
- Ops.push_back(Chain.getValue(1));
- } else {
- Ops.push_back(Chain);
- Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
- }
+ // See if the next operand has type i64.
+ SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+ if (Opnd.getValueType() == MVT::i64)
+ In64 = initAccumulator(Opnd, DL, DAG);
+ else
+ Ops.push_back(Opnd);
- if (!HasI64Out)
- return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
- Ops.begin(), Ops.size());
+ // Push the remaining operands.
+ for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+ Ops.push_back(Op->getOperand(OpNo));
- SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
- Ops.begin(), Ops.size());
- SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
- Intr.getValue(1));
- SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
- OutLo.getValue(2));
- SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
+ // Add In64 to the end of the list.
+ if (In64.getNode())
+ Ops.push_back(In64);
+
+ // Scan output.
+ SmallVector<EVT, 2> ResTys;
+
+ for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+ I != E; ++I)
+ ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+ // Create node.
+ SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+ SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
if (!HasChainIn)
return Out;
- SDValue Vals[] = { Out, OutHi.getValue(1) };
+ assert(Val->getValueType(1) == MVT::Other);
+ SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
return DAG.getMergeValues(Vals, 2, DL);
}
-SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
default:
return SDValue();
case Intrinsic::mips_shilo:
- return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
case Intrinsic::mips_dpau_h_qbl:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
case Intrinsic::mips_dpau_h_qbr:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
case Intrinsic::mips_dpsu_h_qbl:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
case Intrinsic::mips_dpsu_h_qbr:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
case Intrinsic::mips_dpa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
case Intrinsic::mips_dps_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
case Intrinsic::mips_dpax_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
case Intrinsic::mips_dpsx_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
case Intrinsic::mips_mulsa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
case Intrinsic::mips_mult:
- return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::Mult);
case Intrinsic::mips_multu:
- return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::Multu);
case Intrinsic::mips_madd:
- return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
case Intrinsic::mips_maddu:
- return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
case Intrinsic::mips_msub:
- return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSub);
case Intrinsic::mips_msubu:
- return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
}
}
-SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
default:
return SDValue();
case Intrinsic::mips_extp:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
case Intrinsic::mips_extpdp:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
case Intrinsic::mips_extr_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
case Intrinsic::mips_extr_r_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
case Intrinsic::mips_extr_rs_w:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
case Intrinsic::mips_extr_s_h:
- return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
case Intrinsic::mips_mthlip:
- return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
case Intrinsic::mips_mulsaq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
case Intrinsic::mips_maq_s_w_phl:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
case Intrinsic::mips_maq_s_w_phr:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
case Intrinsic::mips_maq_sa_w_phl:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
case Intrinsic::mips_maq_sa_w_phr:
- return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
case Intrinsic::mips_dpaq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
case Intrinsic::mips_dpsq_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
case Intrinsic::mips_dpaq_sa_l_w:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
case Intrinsic::mips_dpsq_sa_l_w:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
case Intrinsic::mips_dpaqx_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
case Intrinsic::mips_dpaqx_sa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
case Intrinsic::mips_dpsqx_s_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
case Intrinsic::mips_dpsqx_sa_w_ph:
- return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
}
}
-SDValue MipsTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR
|| cast<ConstantSDNode>
(Op->getOperand(0).getOperand(0))->getZExtValue() != 0
@@ -2667,28 +2351,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
}
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
- if (!EnableMipsTailCalls)
- return false;
-
- // No tail call optimization for mips16.
- if (Subtarget->inMips16Mode())
- return false;
-
- // Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
- return false;
-
- // Return true if the callee's argument area is no larger than the
- // caller's.
- return NextStackOffset <= FI.getIncomingArgSize();
-}
-
SDValue
MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -2707,21 +2369,65 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+}
+
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue
MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+ DebugLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
+ bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
- bool isVarArg = CLI.IsVarArg;
+ bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2730,22 +2436,24 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- MipsCCInfo.analyzeCallOperands(Outs);
+ MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
+ getTargetMachine().Options.UseSoftFloat,
+ Callee.getNode(), CLI.Args);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
// Check if it's really possible to do a tail call.
- if (isTailCall)
- isTailCall =
- IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
+ if (IsTailCall)
+ IsTailCall =
+ isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
*MF.getInfo<MipsFunctionInfo>());
- if (isTailCall)
+ if (IsTailCall)
++NumTailCalls;
// Chain is the output chain of the last Load/Store or CopyToReg node.
@@ -2755,15 +2463,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- if (!isTailCall)
+ if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL,
IsN64 ? Mips::SP_64 : Mips::SP,
getPointerTy());
// With EABI is it possible to have 16 args on registers.
- SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ std::deque< std::pair<unsigned, SDValue> > RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
@@ -2779,9 +2487,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
assert(ByValArg != MipsCCInfo.byval_end());
- assert(!isTailCall &&
+ assert(!IsTailCall &&
"Do not tail-call optimize if there is a byval argument.");
- passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
+ passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle());
++ByValArg;
continue;
@@ -2793,12 +2501,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
case CCValAssign::Full:
if (VA.isRegLoc()) {
if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
- (ValVT == MVT::f64 && LocVT == MVT::i64))
- Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg);
+ (ValVT == MVT::f64 && LocVT == MVT::i64) ||
+ (ValVT == MVT::i64 && LocVT == MVT::f64))
+ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
- SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Arg, DAG.getConstant(0, MVT::i32));
- SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Arg, DAG.getConstant(1, MVT::i32));
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
@@ -2811,13 +2520,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
break;
case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
break;
case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
break;
case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg);
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
break;
}
@@ -2834,25 +2543,27 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(),
- Chain, Arg, dl, isTailCall, DAG));
+ Chain, Arg, DL, IsTailCall, DAG));
}
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
- bool GlobalOrExternal = false;
+ bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {
- if (G->getGlobal()->hasInternalLinkage())
+ InternalLinkage = G->getGlobal()->hasInternalLinkage();
+
+ if (InternalLinkage)
Callee = getAddrLocal(Callee, DAG, HasMips64);
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
@@ -2860,7 +2571,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else
Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
} else
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
@@ -2871,84 +2582,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16);
- else if (HasMips64)
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP);
- else // O32 & PIC
+ else // N64 || PIC
Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
GlobalOrExternal = true;
}
- SDValue InFlag;
-
- // T9 register operand.
- SDValue T9;
-
- // T9 should contain the address of the callee function if
- // -reloction-model=pic or it is an indirect call.
- if (IsPICCall || !GlobalOrExternal) {
- // copy to T9
- unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
- InFlag = Chain.getValue(1);
-
- if (Subtarget->inMips16Mode())
- T9 = DAG.getRegister(T9Reg, getPointerTy());
- else
- Callee = DAG.getRegister(T9Reg, getPointerTy());
- }
-
- // Insert node "GP copy globalreg" before call to function.
- // Lazy-binding stubs require GP to point to the GOT.
- if (IsPICCall) {
- unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
- EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
- RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty)));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
- //
- // Returns a chain & a flag for retval copy to use.
+ SmallVector<SDValue, 8> Ops(1, Chain);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
- // Add T9 register operand.
- if (T9.getNode())
- Ops.push_back(T9);
+ if (IsTailCall)
+ return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
-
- if (isTailCall)
- return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size());
-
- Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
+ SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -2957,31 +2607,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
+ Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue
MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode,
+ const Type *RetTy) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+ MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+ CallNode, RetTy);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
- Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag).getValue(1);
- InFlag = Chain.getValue(2);
- InVals.push_back(Chain.getValue(0));
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
+ RVLocs[i].getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+
+ InVals.push_back(Val);
}
return Chain;
@@ -2995,9 +2654,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
MipsTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
- bool isVarArg,
+ bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -3011,16 +2670,17 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
- MipsCCInfo.analyzeFormalArguments(Ins);
+ MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
MipsCCInfo.hasByValArg());
- Function::const_arg_iterator FuncArg =
- DAG.getMachineFunction().getFunction()->arg_begin();
unsigned CurArgIdx = 0;
MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
@@ -3036,7 +2696,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
assert(ByValArg != MipsCCInfo.byval_end());
- copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg,
+ copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
MipsCCInfo, *ByValArg);
++ByValArg;
continue;
@@ -3049,7 +2709,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
- RC = &Mips::CPURegsRegClass;
+ RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
+ &Mips::CPURegsRegClass;
else if (RegVT == MVT::i64)
RC = &Mips::CPU64RegsRegClass;
else if (RegVT == MVT::f32)
@@ -3061,8 +2722,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Transform the arguments stored on
// physical registers into virtual ones
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8 or 16-bit value, it has been passed promoted
// to 32 bits. Insert an assert[sz]ext to capture this, then
@@ -3074,22 +2735,24 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
else if (VA.getLocInfo() == CCValAssign::ZExt)
Opcode = ISD::AssertZext;
if (Opcode)
- ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+ ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
}
- // Handle floating point arguments passed in integer registers.
+ // Handle floating point arguments passed in integer registers and
+ // long double arguments passed in floating point registers.
if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
- (RegVT == MVT::i64 && ValVT == MVT::f64))
- ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue);
+ (RegVT == MVT::i64 && ValVT == MVT::f64) ||
+ (RegVT == MVT::f64 && ValVT == MVT::i64))
+ ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
- unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
getNextIntArgReg(ArgReg), RC);
- SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
if (!Subtarget->isLittle())
std::swap(ArgValue, ArgValue2);
- ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64,
ArgValue, ArgValue2);
}
@@ -3105,7 +2768,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN,
+ InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0));
}
@@ -3121,18 +2784,18 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
}
- if (isVarArg)
- writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG);
+ if (IsVarArg)
+ writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&OutChains[0], OutChains.size());
}
@@ -3145,80 +2808,80 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
bool
MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
+ MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
+ DebugLoc DL, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
// the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
+ *DAG.getContext());
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- // Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
-
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
+ // Analyze return values.
+ MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+ MF.getFunction()->getReturnType());
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+ if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+ Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
- // guarantee that all emitted copies are
- // stuck together, avoiding something bad
+ // Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
// The mips ABIs for returning structs by value requires that we copy
// the sret argument into $v0 for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into $v0.
- if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
- MachineFunction &MF = DAG.getMachineFunction();
+ if (MF.getFunction()->hasStructRetAttr()) {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
- SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0;
- Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
- MF.getRegInfo().addLiveOut(V0);
+ RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
}
- // Return on Mips is always a "jr $ra"
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- // Return Void
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain);
+ // Return on Mips is always a "jr $ra"
+ return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
@@ -3251,6 +2914,8 @@ getConstraintType(const std::string &Constraint) const
case 'l':
case 'x':
return C_RegisterClass;
+ case 'R':
+ return C_Memory;
}
}
return TargetLowering::getConstraintType(Constraint);
@@ -3299,6 +2964,9 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
if (isa<ConstantInt>(CallOperandVal))
weight = CW_Constant;
break;
+ case 'R':
+ weight = CW_Memory;
+ break;
}
return weight;
}
@@ -3448,13 +3116,34 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
bool
+MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (!AM.HasBaseReg) // allow "r+i".
+ break;
+ return false; // disallow "r+r" or "r+r+i".
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool
MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Mips target isn't yet aware of offsets.
return false;
}
EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsZeroVal,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (Subtarget->hasMips64())
@@ -3478,40 +3167,62 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const {
return TargetLowering::getJumpTableEncoding();
}
-MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg,
- bool IsO32, CCState &Info) : CCInfo(Info) {
- UseRegsForByval = true;
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+ const char *const LibCalls[] =
+ {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
+ "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
+ "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
+ "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
+ "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
+ "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+ "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
+ "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
+ "truncl"};
- if (IsO32) {
- RegSize = 4;
- NumIntArgRegs = array_lengthof(O32IntRegs);
- ReservedArgArea = 16;
- IntArgRegs = ShadowRegs = O32IntRegs;
- FixedFn = VarFn = CC_MipsO32;
- } else {
- RegSize = 8;
- NumIntArgRegs = array_lengthof(Mips64IntRegs);
- ReservedArgArea = 0;
- IntArgRegs = Mips64IntRegs;
- ShadowRegs = Mips64DPRegs;
- FixedFn = CC_MipsN;
- VarFn = CC_MipsN_VarArg;
- }
+ const char * const *End = LibCalls + array_lengthof(LibCalls);
- if (CallConv == CallingConv::Fast) {
- assert(!IsVarArg);
- UseRegsForByval = false;
- ReservedArgArea = 0;
- FixedFn = VarFn = CC_Mips_FastCC;
- }
+ // Check that LibCalls is sorted alphabetically.
+ MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+ for (const char * const *I = LibCalls; I < End - 1; ++I)
+ assert(Comp(*I, *(I + 1)));
+#endif
+
+ return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128 or i128 which was originally a
+/// fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+ if (Ty->isFP128Ty())
+ return true;
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+ // If the Ty is i128 and the function being called is a long double emulation
+ // routine, then the original type is f128.
+ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
+ CCState &Info)
+ : CCInfo(Info), CallConv(CC), IsO32(IsO32_) {
// Pre-allocate reserved argument area.
- CCInfo.AllocateStack(ReservedArgArea, 1);
+ CCInfo.AllocateStack(reservedArgArea(), 1);
}
void MipsTargetLowering::MipsCC::
-analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
+analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
+ bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs) {
+ assert((CallConv != CallingConv::Fast || !IsVarArg) &&
+ "CallingConv::Fast shouldn't be used for vararg functions.");
+
unsigned NumOpnds = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
for (unsigned I = 0; I != NumOpnds; ++I) {
MVT ArgVT = Args[I].VT;
@@ -3523,10 +3234,13 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
continue;
}
- if (Args[I].IsFixed)
- R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- else
+ if (IsVarArg && !Args[I].IsFixed)
R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ else {
+ MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
+ IsSoftFloat);
+ R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
+ }
if (R) {
#ifndef NDEBUG
@@ -3539,19 +3253,26 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
}
void MipsTargetLowering::MipsCC::
-analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
+analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
+ bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
unsigned NumArgs = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn();
+ unsigned CurArgIdx = 0;
for (unsigned I = 0; I != NumArgs; ++I) {
MVT ArgVT = Args[I].VT;
ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
+ std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Args[I].OrigArgIndex;
if (ArgFlags.isByVal()) {
handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
continue;
}
- if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo))
+ MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat);
+
+ if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
continue;
#ifndef NDEBUG
@@ -3562,6 +3283,44 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
}
}
+template<typename Ty>
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ CCAssignFn *Fn;
+
+ if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
+ Fn = RetCC_F128Soft;
+ else
+ Fn = RetCC_Mips;
+
+ for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
+ MVT VT = RetVals[I].VT;
+ ISD::ArgFlagsTy Flags = RetVals[I].Flags;
+ MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
+
+ if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << I << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(0);
+ }
+ }
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const {
+ analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
+ const Type *RetTy) const {
+ analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
+}
+
void
MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
MVT LocVT,
@@ -3570,11 +3329,12 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
struct ByValArgInfo ByVal;
+ unsigned RegSize = regSize();
unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
RegSize * 2);
- if (UseRegsForByval)
+ if (useRegsForByval())
allocateRegs(ByVal, ByValSize, Align);
// Allocate space on caller's stack.
@@ -3585,9 +3345,38 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
ByValArgs.push_back(ByVal);
}
+unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
+ return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
+}
+
+unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
+ return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const {
+ return IsO32 ? O32IntRegs : Mips64IntRegs;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
+ if (CallConv == CallingConv::Fast)
+ return CC_Mips_FastCC;
+
+ return IsO32 ? CC_MipsO32 : CC_MipsN;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
+ return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
+ return IsO32 ? O32IntRegs : Mips64DPRegs;
+}
+
void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
unsigned ByValSize,
unsigned Align) {
+ unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
+ const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
"Byval argument's size and alignment should be a multiple of"
"RegSize.");
@@ -3606,6 +3395,21 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
}
+MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
+ const SDNode *CallNode,
+ bool IsSoftFloat) const {
+ if (IsSoftFloat || IsO32)
+ return VT;
+
+ // Check if the original type was fp128.
+ if (originalTypeIsF128(OrigTy, CallNode)) {
+ assert(VT == MVT::i64);
+ return MVT::f64;
+ }
+
+ return VT;
+}
+
void MipsTargetLowering::
copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
@@ -3633,12 +3437,12 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
return;
// Copy arg registers.
- EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+ MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
- unsigned VReg = AddLiveIn(MF, ArgReg, RC);
+ unsigned VReg = addLiveIn(MF, ArgReg, RC);
unsigned Offset = I * CC.regSize();
SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
DAG.getConstant(Offset, PtrTy));
@@ -3652,7 +3456,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
// Copy byVal arg to registers and stack.
void MipsTargetLowering::
passByValArg(SDValue Chain, DebugLoc DL,
- SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const MipsCC &CC, const ByValArgInfo &ByVal,
@@ -3755,7 +3559,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
const CCState &CCInfo = CC.getCCInfo();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
unsigned RegSize = CC.regSize();
- EVT RegTy = MVT::getIntegerVT(RegSize * 8);
+ MVT RegTy = MVT::getIntegerVT(RegSize * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3780,7 +3584,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
// in the caller's stack frame, while for N32/64, it is allocated in the
// callee's stack frame.
for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
- unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC);
+ unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 43f97e89a7bf..cab71a61e07a 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -19,7 +19,10 @@
#include "MipsSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetLowering.h"
+#include <deque>
+#include <string>
namespace llvm {
namespace MipsISD {
@@ -63,6 +66,18 @@ namespace llvm {
// Return
Ret,
+ EH_RETURN,
+
+ // Node used to extract integer from accumulator.
+ ExtractLOHI,
+
+ // Node used to insert integers to accumulator.
+ InsertLOHI,
+
+ // Mult nodes.
+ Mult,
+ Multu,
+
// MAdd/Sub nodes
MAdd,
MAddu,
@@ -72,6 +87,8 @@ namespace llvm {
// DivRem(u)
DivRem,
DivRemU,
+ DivRem16,
+ DivRemU16,
BuildPairF64,
ExtractElementF64,
@@ -147,9 +164,9 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -172,7 +189,34 @@ namespace llvm {
EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- private:
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
+
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
+
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
+
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
+
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
@@ -187,53 +231,80 @@ namespace llvm {
/// arguments and inquire about calling convention information.
class MipsCC {
public:
- MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32,
- CCState &Info);
+ MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info);
- void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs);
- void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins);
- void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags);
+ void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsVarArg, bool IsSoftFloat,
+ const SDNode *CallNode,
+ std::vector<ArgListEntry> &FuncArgs);
+ void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat,
+ Function::const_arg_iterator FuncArg);
+
+ void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsSoftFloat, const SDNode *CallNode,
+ const Type *RetTy) const;
+
+ void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsSoftFloat, const Type *RetTy) const;
const CCState &getCCInfo() const { return CCInfo; }
/// hasByValArg - Returns true if function has byval arguments.
bool hasByValArg() const { return !ByValArgs.empty(); }
- /// useRegsForByval - Returns true if the calling convention allows the
- /// use of registers to pass byval arguments.
- bool useRegsForByval() const { return UseRegsForByval; }
-
/// regSize - Size (in number of bits) of integer registers.
- unsigned regSize() const { return RegSize; }
+ unsigned regSize() const { return IsO32 ? 4 : 8; }
/// numIntArgRegs - Number of integer registers available for calls.
- unsigned numIntArgRegs() const { return NumIntArgRegs; }
+ unsigned numIntArgRegs() const;
/// reservedArgArea - The size of the area the caller reserves for
/// register arguments. This is 16-byte if ABI is O32.
- unsigned reservedArgArea() const { return ReservedArgArea; }
+ unsigned reservedArgArea() const;
- /// intArgRegs - Pointer to array of integer registers.
- const uint16_t *intArgRegs() const { return IntArgRegs; }
+ /// Return pointer to array of integer argument registers.
+ const uint16_t *intArgRegs() const;
typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator;
byval_iterator byval_begin() const { return ByValArgs.begin(); }
byval_iterator byval_end() const { return ByValArgs.end(); }
private:
+ void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags);
+
+ /// useRegsForByval - Returns true if the calling convention allows the
+ /// use of registers to pass byval arguments.
+ bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
+
+ /// Return the function that analyzes fixed argument list functions.
+ llvm::CCAssignFn *fixedArgFn() const;
+
+ /// Return the function that analyzes variable argument list functions.
+ llvm::CCAssignFn *varArgFn() const;
+
+ const uint16_t *shadowRegs() const;
+
void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
unsigned Align);
+ /// Return the type of the register which is used to pass an argument or
+ /// return a value. This function returns f64 if the argument is an i64
+ /// value which has been generated as a result of softening an f128 value.
+ /// Otherwise, it just returns VT.
+ MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
+ bool IsSoftFloat) const;
+
+ template<typename Ty>
+ void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+ const SDNode *CallNode, const Type *RetTy) const;
+
CCState &CCInfo;
- bool UseRegsForByval;
- unsigned RegSize;
- unsigned NumIntArgRegs;
- unsigned ReservedArgArea;
- const uint16_t *IntArgRegs, *ShadowRegs;
+ CallingConv::ID CallConv;
+ bool IsO32;
SmallVector<ByValArgInfo, 2> ByValArgs;
- llvm::CCAssignFn *FixedFn, *VarFn;
};
// Subtarget Info
@@ -241,44 +312,49 @@ namespace llvm {
bool HasMips64, IsN64, IsO32;
+ private:
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ const SDNode *CallNode, const Type *RetTy) const;
// Lower Operand specifics
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+ SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
+ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
- bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
@@ -292,7 +368,7 @@ namespace llvm {
/// passByValArg - Pass a byval argument in registers or on stack.
void passByValArg(SDValue Chain, DebugLoc DL,
- SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const MipsCC &CC, const ByValArgInfo &ByVal,
@@ -332,10 +408,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
@@ -357,10 +429,13 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsZeroVal,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const;
@@ -371,18 +446,20 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
- MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
bool Nand = false) const;
- MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+ MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
};
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 33ee02068946..6b23057c9cdb 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -86,272 +86,320 @@ def fpimm0neg : PatLeaf<(fpimm), [{
// Only S32 and D32 are supported right now.
//===----------------------------------------------------------------------===//
-// FP load.
-let DecoderMethod = "DecodeFMem" in {
-class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs RC:$ft), (ins MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load addr:$addr))],
- IILoad>;
-
-// FP store.
-class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)],
- IIStore>;
-}
-// FP indexed load.
-class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, SDPatternOperator FOp = null_frag>:
- FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fd, ${index}(${base})"),
- [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
- let fs = 0;
-}
-
-// FP indexed store.
-class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, SDPatternOperator FOp= null_frag>:
- FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fs, ${index}(${base})"),
- [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
- let fd = 0;
-}
-
-// Instructions that convert an FP value to 32-bit fixed point.
-multiclass FFR1_W_M<bits<6> funct, string opstr> {
- def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
- def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
- Requires<[NotFP64bit, HasStandardEncoding]>;
- def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
- Requires<[IsFP64bit, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
+class ADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> {
+ let isCommutable = IsComm;
+}
+
+multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode = null_frag> {
+ def _D32 : ADDS_FT<opstr, AFGR64, Itin, IsComm, OpNode>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ADDS_FT<opstr, FGR64, Itin, IsComm, OpNode>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ string DecoderNamespace = "Mips64";
}
}
-// Instructions that convert an FP value to 64-bit fixed point.
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in
-multiclass FFR1_L_M<bits<6> funct, string opstr> {
- def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
- def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
-}
+class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>,
+ NeverHasSideEffects;
-// FP-to-FP conversion instructions.
-multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
- def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
- def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
- Requires<[NotFP64bit, HasStandardEncoding]>;
- def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
- Requires<[IsFP64bit, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
+multiclass ABSS_M<string opstr, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> {
+ def _D32 : ABSS_FT<opstr, AFGR64, AFGR64, Itin, OpNode>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ABSS_FT<opstr, FGR64, FGR64, Itin, OpNode>,
+ Requires<[IsFP64bit, HasStdEnc]> {
+ string DecoderNamespace = "Mips64";
}
}
-multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
- let isCommutable = isComm in {
- def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
- def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
- Requires<[NotFP64bit, HasStandardEncoding]>;
- def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
- Requires<[IsFP64bit, HasStandardEncoding]> {
+multiclass ROUND_M<string opstr, InstrItinClass Itin> {
+ def _D32 : ABSS_FT<opstr, FGR32, AFGR64, Itin>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+ def _D64 : ABSS_FT<opstr, FGR32, FGR64, Itin>,
+ Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
-}
-// FP madd/msub/nmadd/nmsub instruction classes.
-class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
- SDNode OpNode, RegisterClass RC> :
- FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
- [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>;
-
-class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
- SDNode OpNode, RegisterClass RC> :
- FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
- [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>;
+class MFC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>;
+
+class NMADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
+ Itin, FrmFR>;
+
+class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fd, ${index}(${base})"),
+ [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
+
+class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fs, ${index}(${base})"),
+ [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
+
+class BC1F_FT<string opstr, InstrItinClass Itin,
+ SDPatternOperator Op = null_frag> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(MipsFPBrcond Op, bb:$offset)], Itin, FrmFI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+ let Uses = [FCR31];
+}
+
+class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond),
+ !strconcat("c.$cond.", typestr, "\t$fs, $ft"),
+ [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> {
+ let Defs = [FCR31];
+}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-defm ROUND_W : FFR1_W_M<0xc, "round">;
-defm ROUND_L : FFR1_L_M<0x8, "round">;
-defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
-defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
-defm CEIL_W : FFR1_W_M<0xe, "ceil">;
-defm CEIL_L : FFR1_L_M<0xa, "ceil">;
-defm FLOOR_W : FFR1_W_M<0xf, "floor">;
-defm FLOOR_L : FFR1_L_M<0xb, "floor">;
-defm CVT_W : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects;
-//defm CVT_L : FFR1_L_M<0x25, "cvt">;
-
-def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects;
-def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects;
-def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects;
-
-let Predicates = [NotFP64bit, HasStandardEncoding], neverHasSideEffects = 1 in {
- def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
- def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
- def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
-}
-
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64",
- neverHasSideEffects = 1 in {
- def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
- def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
- def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
- def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
- def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
-}
-
-let Predicates = [NoNaNsFPMath, HasStandardEncoding] in {
- defm FABS : FFR1P_M<0x5, "abs", fabs>;
- defm FNEG : FFR1P_M<0x7, "neg", fneg>;
-}
-defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
+def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>;
+def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>;
+def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>;
+def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>;
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>;
+
+defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
+defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>;
+defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>;
+defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
+defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>;
+ def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x8, 17>;
+ def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>;
+ def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x9, 17>;
+ def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>;
+ def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>;
+ def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>;
+ def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0xb, 17>;
+}
+
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>;
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>;
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>;
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+}
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>;
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>;
+}
+
+let Predicates = [NoNaNsFPMath, HasStdEnc] in {
+ def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>;
+ def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>;
+ defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
+ defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
+}
+
+def FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>,
+ ABSS_FM<0x4, 16>;
+defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
// When defining instructions, we reference all 32-bit registers,
// regardless of register aliasing.
-class FFRGPR<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern>:
- FFR<0x11, 0x0, _fmt, outs, ins, asmstr, pattern> {
- bits<5> rt;
- let ft = rt;
- let fd = 0;
-}
-
/// Move Control Registers From/To CPU Registers
-def CFC1 : FFRGPR<0x2, (outs CPURegs:$rt), (ins CCR:$fs),
- "cfc1\t$rt, $fs", []>;
-
-def CTC1 : FFRGPR<0x6, (outs CCR:$fs), (ins CPURegs:$rt),
- "ctc1\t$rt, $fs", []>;
-
-def MFC1 : FFRGPR<0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
- "mfc1\t$rt, $fs",
- [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
-
-def MTC1 : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
- "mtc1\t$rt, $fs",
- [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
-
-def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs),
- "dmfc1\t$rt, $fs",
- [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>;
-
-def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
- "dmtc1\t$rt, $fs",
- [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>;
-
-def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
-def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
- Requires<[NotFP64bit, HasStandardEncoding]>;
-def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
- Requires<[IsFP64bit, HasStandardEncoding]> {
+def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>;
+def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>;
+def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>;
+def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>;
+def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>;
+def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>;
+
+def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>;
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>,
+ Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
-let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in {
- def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>;
- def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>;
- def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> {
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>;
+ def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>;
+ def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> {
let isCodeGenOnly =1;
}
- def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> {
+ def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> {
let isCodeGenOnly =1;
}
}
-let Predicates = [NotN64, HasStandardEncoding] in {
- def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>;
- def SWC1 : FPStore<0x39, "swc1", FGR32, mem>;
+let Predicates = [NotN64, HasStdEnc] in {
+ def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>;
+ def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>;
}
-let Predicates = [NotN64, HasMips64, HasStandardEncoding],
+let Predicates = [NotN64, HasMips64, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
- def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
+ def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>;
}
-let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
- def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>;
- def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>;
+let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+ def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>;
}
// Indexed loads and stores.
-let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
- def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>;
- def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>;
+let Predicates = [HasFPIdx, HasStdEnc] in {
+ def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
+ def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
}
-let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
- def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>;
- def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>;
+let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
}
-let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in {
- def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>;
- def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>;
+let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
}
// n64
-let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
- def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>;
- def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>;
- def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>;
- def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store>;
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
+ def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
+ def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
+ LWXC1_FM<1>;
+ def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>,
+ SWXC1_FM<8>;
+ def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>,
+ SWXC1_FM<9>;
}
// Load/store doubleword indexed unaligned.
-let Predicates = [NotMips64, HasStandardEncoding] in {
- def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
- def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+let Predicates = [NotMips64, HasStdEnc] in {
+ def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
}
-let Predicates = [HasMips64, HasStandardEncoding],
+let Predicates = [HasMips64, HasStdEnc],
DecoderNamespace="Mips64" in {
- def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
- def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
}
/// Floating-point Aritmetic
-defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
-defm FDIV : FFR2P_M<0x03, "div", fdiv>;
-defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
-defm FSUB : FFR2P_M<0x01, "sub", fsub>;
+def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>;
+defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
+def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>;
+defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
+def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>;
+defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
+def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>;
+defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
- def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
- def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
+let Predicates = [HasMips32r2, HasStdEnc] in {
+ def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>;
+ def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>;
}
-let Predicates = [HasMips32r2, NoNaNsFPMath, HasStandardEncoding] in {
- def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
- def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
+let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
+ def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>;
+ def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>;
}
-let Predicates = [HasMips32r2, NotFP64bit, HasStandardEncoding] in {
- def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
- def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
+let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
+ def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
}
-let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStandardEncoding] in {
- def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
- def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
+ def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
}
-let Predicates = [HasMips32r2, IsFP64bit, HasStandardEncoding], isCodeGenOnly=1 in {
- def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
- def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
+let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
+ def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
}
-let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
isCodeGenOnly=1 in {
- def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
- def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
+ def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
}
//===----------------------------------------------------------------------===//
@@ -362,19 +410,9 @@ let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-/// Floating Point Branch of False/True (Likely)
-let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
- class FBRANCH<bits<1> nd, bits<1> tf, PatLeaf op, string asmstr> :
- FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
- [(MipsFPBrcond op, bb:$dst)]> {
- let Inst{20-18} = 0;
- let Inst{17} = nd;
- let Inst{16} = tf;
-}
-
let DecoderMethod = "DecodeBC1" in {
-def BC1F : FBRANCH<0, 0, MIPS_BRANCH_F, "bc1f">;
-def BC1T : FBRANCH<0, 1, MIPS_BRANCH_T, "bc1t">;
+def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>;
+def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>;
}
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -398,33 +436,24 @@ def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
-class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
- FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc),
- !strconcat("c.$cc.", typestr, "\t$fs, $ft"),
- [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>;
-
/// Floating Point Compare
-let Defs=[FCR31] in {
- def FCMP_S32 : FCMP<0x10, FGR32, "s">;
- def FCMP_D32 : FCMP<0x11, AFGR64, "d">,
- Requires<[NotFP64bit, HasStandardEncoding]>;
- def FCMP_D64 : FCMP<0x11, FGR64, "d">,
- Requires<[IsFP64bit, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
- }
-}
+def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+let DecoderNamespace = "Mips64" in
+def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
def BuildPairF64 :
PseudoSE<(outs AFGR64:$dst),
- (ins CPURegs:$lo, CPURegs:$hi), "",
+ (ins CPURegs:$lo, CPURegs:$hi),
[(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
@@ -432,7 +461,7 @@ def BuildPairF64 :
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
def ExtractElementF64 :
- PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n),
[(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
@@ -444,7 +473,7 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
(CVT_D32_W (MTC1 CPURegs:$src))>;
def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
@@ -453,7 +482,7 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in {
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
}
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
@@ -473,3 +502,28 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
+
+// Patterns for loads/stores with a reg+imm operand.
+let AddedComplexity = 40 in {
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : LoadRegImmPat<LWC1_P8, f32, load>;
+ def : StoreRegImmPat<SWC1_P8, f32>;
+ def : LoadRegImmPat<LDC164_P8, f64, load>;
+ def : StoreRegImmPat<SDC164_P8, f64>;
+ }
+
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : LoadRegImmPat<LWC1, f32, load>;
+ def : StoreRegImmPat<SWC1, f32>;
+ }
+
+ let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+ def : LoadRegImmPat<LDC164, f64, load>;
+ def : StoreRegImmPat<SDC164, f64>;
+ }
+
+ let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+ def : LoadRegImmPat<LDC1, f64, load>;
+ def : StoreRegImmPat<SDC1, f64>;
+ }
+}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1ecbdc2474b3..ee432c875355 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -76,20 +76,22 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
InstrItinClass itin, Format f>:
MipsInst<outs, ins, asmstr, pattern, itin, f> {
- let Predicates = [HasStandardEncoding];
+ let Predicates = [HasStdEnc];
}
// Mips Pseudo Instructions Format
-class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+class MipsPseudo<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo> :
+ MipsInst<outs, ins, "", pattern, itin, Pseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
// Mips32/64 Pseudo Instruction Format
-class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsPseudo<outs, ins, asmstr, pattern> {
- let Predicates = [HasStandardEncoding];
+class PseudoSE<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
+ let Predicates = [HasStdEnc];
}
// Pseudo-instructions for alternate assembly syntax (never used by codegen).
@@ -161,30 +163,28 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
+class FJ<bits<6> op>
{
- bits<26> addr;
+ bits<26> target;
- let Opcode = op;
+ bits<32> Inst;
- let Inst{25-0} = addr;
+ let Inst{31-26} = op;
+ let Inst{25-0} = target;
}
- //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
// MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|>
//===----------------------------------------------------------------------===//
-class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
- InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>
+class MFC3OP_FM<bits<6> op, bits<5> mfmt>
{
- bits<5> mfmt;
bits<5> rt;
bits<5> rd;
bits<3> sel;
- let Opcode = op;
- let mfmt = _mfmt;
+ bits<32> Inst;
+ let Inst{31-26} = op;
let Inst{25-21} = mfmt;
let Inst{20-16} = rt;
let Inst{15-11} = rd;
@@ -192,6 +192,270 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
let Inst{2-0} = sel;
}
+class ADD_FM<bits<6> op, bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class ADDI_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class SRA_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> shamt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-22} = 0;
+ let Inst{21} = rotate;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+class SRLV_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-7} = 0;
+ let Inst{6} = rotate;
+ let Inst{5-0} = funct;
+}
+
+class BEQ_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+class BGEZ_FM<bits<6> op, bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class B_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 4;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = offset;
+}
+
+class SLTI_FM<bits<6> op> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class MFLO_FM<bits<6> funct> {
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class MTLO_FM<bits<6> funct> {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class SEB_FM<bits<5> funct, bits<6> funct2> {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = funct;
+ let Inst{5-0} = funct2;
+}
+
+class CLO_FM<bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1c;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+ let rt = rd;
+}
+
+class LUI_FM {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0xf;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class JALR_FM {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 9;
+}
+
+class BAL_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0x11;
+ let Inst{15-0} = offset;
+}
+
+class BGEZAL_FM<bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class SYNC_FM {
+ bits<5> stype;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{10-6} = stype;
+ let Inst{5-0} = 0xf;
+}
+
+class MULT_FM<bits<6> op, bits<6> funct> {
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class EXT_FM<bits<6> funct> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> pos;
+ bits<5> size;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class RDHWR_FM {
+ bits<5> rt;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 0x3b;
+}
+
//===----------------------------------------------------------------------===//
//
// FLOATING POINT INSTRUCTION FORMATS
@@ -206,31 +470,6 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
-//===----------------------------------------------------------------------===//
-
-class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
- string asmstr, list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
-{
- bits<5> fd;
- bits<5> fs;
- bits<5> ft;
- bits<5> fmt;
- bits<6> funct;
-
- let Opcode = op;
- let funct = _funct;
- let fmt = _fmt;
-
- let Inst{25-21} = fmt;
- let Inst{20-16} = ft;
- let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-0} = funct;
-}
-
-//===----------------------------------------------------------------------===//
// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
//===----------------------------------------------------------------------===//
@@ -248,130 +487,179 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
let Inst{15-0} = imm16;
}
-//===----------------------------------------------------------------------===//
-// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
-//===----------------------------------------------------------------------===//
-
-class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> fs;
- bits<5> ft;
- bits<4> cc;
- bits<5> fmt;
+class ADDS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
- let Opcode = 0x11;
- let fmt = _fmt;
+ bits<32> Inst;
+ let Inst{31-26} = 0x11;
let Inst{25-21} = fmt;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-6} = 0;
- let Inst{5-4} = 0b11;
- let Inst{3-0} = cc;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
}
+class ABSS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
-class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> rd;
- bits<5> rs;
- bits<3> cc;
- bits<1> tf;
-
- let Opcode = 0;
- let tf = _tf;
+ bits<32> Inst;
- let Inst{25-21} = rs;
- let Inst{20-18} = cc;
- let Inst{17} = 0;
- let Inst{16} = tf;
- let Inst{15-11} = rd;
- let Inst{10-6} = 0;
- let Inst{5-0} = 1;
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
}
-class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> fd;
- bits<5> fs;
- bits<3> cc;
- bits<5> fmt;
- bits<1> tf;
+class MFC1_FM<bits<5> funct> {
+ bits<5> rt;
+ bits<5> fs;
- let Opcode = 17;
- let fmt = _fmt;
- let tf = _tf;
+ bits<32> Inst;
- let Inst{25-21} = fmt;
- let Inst{20-18} = cc;
- let Inst{17} = 0;
- let Inst{16} = tf;
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rt;
let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-0} = 17;
+ let Inst{10-0} = 0;
}
-// FP unary instructions without patterns.
-class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
- RegisterClass DstRC, RegisterClass SrcRC> :
- FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
- let ft = 0;
-}
+class LW_FM<bits<6> op> {
+ bits<5> rt;
+ bits<21> addr;
-// FP unary instructions with patterns.
-class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
- RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
- FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
- [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
- let ft = 0;
-}
+ bits<32> Inst;
-class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
- string fmtstr, RegisterClass RC, SDNode OpNode> :
- FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
- [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
+ let Inst{31-26} = op;
+ let Inst{25-21} = addr{20-16};
+ let Inst{20-16} = rt;
+ let Inst{15-0} = addr{15-0};
+}
-// Floating point madd/msub/nmadd/nmsub.
-class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+class MADDS_FM<bits<3> funct, bits<3> fmt> {
bits<5> fd;
bits<5> fr;
bits<5> fs;
bits<5> ft;
- let Opcode = 0x13;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
let Inst{25-21} = fr;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-3} = funct;
- let Inst{2-0} = fmt;
+ let Inst{10-6} = fd;
+ let Inst{5-3} = funct;
+ let Inst{2-0} = fmt;
}
-// FP indexed load/store instructions.
-class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> base;
- bits<5> index;
- bits<5> fs;
- bits<5> fd;
+class LWXC1_FM<bits<6> funct> {
+ bits<5> fd;
+ bits<5> base;
+ bits<5> index;
- let Opcode = 0x13;
+ bits<32> Inst;
+ let Inst{31-26} = 0x13;
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class SWXC1_FM<bits<6> funct> {
+ bits<5> fs;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
let Inst{25-21} = base;
let Inst{20-16} = index;
let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class BC1F_FM<bit nd, bit tf> {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = 0x8;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = nd;
+ let Inst{16} = tf;
+ let Inst{15-0} = offset;
+}
+
+class CEQS_FM<bits<5> fmt> {
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cond;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-8} = 0; // cc
+ let Inst{7-4} = 0x3;
+ let Inst{3-0} = cond;
+}
+
+class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = fs;
let Inst{10-6} = fd;
let Inst{5-0} = funct;
}
+
+class CMov_F_I_FM<bit tf> {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class CMov_F_F_FM<bits<5> fmt, bit tf> {
+ bits<5> fd;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 0x11;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index ca80d43f36f1..ad92d41209e9 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -11,16 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "MipsGenInstrInfo.inc"
@@ -93,81 +93,11 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
-
- MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
-
- // Skip all the debug instructions.
- while (I != REnd && I->isDebugValue())
- ++I;
-
- if (I == REnd || !isUnpredicatedTerminator(&*I)) {
- // If this block ends with no branches (it just falls through to its succ)
- // just return false, leaving TBB/FBB null.
- TBB = FBB = NULL;
- return false;
- }
-
- MachineInstr *LastInst = &*I;
- unsigned LastOpc = LastInst->getOpcode();
-
- // Not an analyzable branch (must be an indirect jump).
- if (!GetAnalyzableBrOpc(LastOpc))
- return true;
-
- // Get the second to last instruction in the block.
- unsigned SecondLastOpc = 0;
- MachineInstr *SecondLastInst = NULL;
-
- if (++I != REnd) {
- SecondLastInst = &*I;
- SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
-
- // Not an analyzable branch (must be an indirect jump).
- if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
- return true;
- }
-
- // If there is only one terminator instruction, process it.
- if (!SecondLastOpc) {
- // Unconditional branch
- if (LastOpc == UncondBrOpc) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // Conditional branch
- AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
- return false;
- }
-
- // If we reached here, there are two branches.
- // If there are three terminators, we don't know what sort of block this is.
- if (++I != REnd && isUnpredicatedTerminator(&*I))
- return true;
-
- // If second to last instruction is an unconditional branch,
- // analyze it and remove the last instruction.
- if (SecondLastOpc == UncondBrOpc) {
- // Return if the last instruction cannot be removed.
- if (!AllowModify)
- return true;
-
- TBB = SecondLastInst->getOperand(0).getMBB();
- LastInst->eraseFromParent();
- return false;
- }
-
- // Conditional branch followed by an unconditional branch.
- // The last one must be unconditional.
- if (LastOpc != UncondBrOpc)
- return true;
-
- AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
- FBB = LastInst->getOperand(0).getMBB();
+ bool AllowModify) const {
+ SmallVector<MachineInstr*, 2> BranchInstrs;
+ BranchType BT = AnalyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs);
- return false;
+ return (BT == BT_None) || (BT == BT_Indirect);
}
void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
@@ -256,6 +186,90 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
return false;
}
+MipsInstrInfo::BranchType MipsInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const {
+
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // This block ends with no branches (it just falls through to its succ).
+ // Leave TBB/FBB null.
+ TBB = FBB = NULL;
+ return BT_NoBranch;
+ }
+
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ BranchInstrs.push_back(LastInst);
+
+ // Not an analyzable branch (e.g., indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+ return BT_None;
+ }
+
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
+ // Unconditional branch
+ if (LastOpc == UncondBrOpc) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return BT_Uncond;
+ }
+
+ // Conditional branch
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return BT_Cond;
+ }
+
+ // If we reached here, there are two branches.
+ // If there are three terminators, we don't know what sort of block this is.
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
+ return BT_None;
+
+ BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);
+
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == UncondBrOpc) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return BT_None;
+
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ BranchInstrs.pop_back();
+ return BT_Uncond;
+ }
+
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != UncondBrOpc)
+ return BT_None;
+
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+
+ return BT_CondUncond;
+}
+
/// Return the number of bytes of code the specified instruction may be.
unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index aca2bc7ae98d..8c05d97beac2 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -31,6 +31,15 @@ protected:
unsigned UncondBrOpc;
public:
+ enum BranchType {
+ BT_None, // Couldn't analyze branch.
+ BT_NoBranch, // No branches found.
+ BT_Uncond, // One unconditional branch.
+ BT_Cond, // One conditional branch.
+ BT_CondUncond, // A conditional branch followed by an unconditional branch.
+ BT_Indirect // One indirct branch.
+ };
+
explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
static const MipsInstrInfo *create(MipsTargetMachine &TM);
@@ -51,6 +60,12 @@ public:
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify,
+ SmallVectorImpl<MachineInstr*> &BranchInstrs) const;
+
virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
const MDNode *MDPtr,
@@ -71,6 +86,36 @@ public:
/// Return the number of bytes of code the specified instruction may be.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
protected:
bool isZeroImm(const MachineOperand &op) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index aa8881997285..3a82e8171301 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,18 +23,19 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
- [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisSameAs<2, 3>]>;
-def SDT_MipsDivRem : SDTypeProfile<0, 2,
- [SDTCisInt<0>,
- SDTCisSameAs<0, 1>]>;
+def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
+ SDTCisVT<2, i32>]>;
+def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
+ [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
-def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>,
- SDTCisSameAs<0, 1>]>;
def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
@@ -74,7 +75,8 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
// Return
-def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
@@ -83,20 +85,27 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
[SDNPHasChain, SDNPSideEffect,
SDNPOptInGlue, SDNPOutGlue]>;
+// Node used to extract integer from LO/HI register.
+def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+
+// Node used to insert 32-bit integers to LOHI register pair.
+def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+
+// Mult nodes.
+def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
+def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>;
+
// MAdd*/MSub* nodes
-def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>;
// DivRem(u) nodes
-def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
- [SDNPOutGlue]>;
-def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>;
+def MipsDivRem16 : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>;
+def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
[SDNPOutGlue]>;
// Target constant nodes that are not part of any isel patterns and remain
@@ -111,10 +120,6 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
-// Pointer to dynamically allocated stack area.
-def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
- [SDNPHasChain, SDNPInGlue]>;
-
def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
@@ -148,14 +153,14 @@ def HasSwap : Predicate<"Subtarget.hasSwap()">,
AssemblerPredicate<"FeatureSwap">;
def HasCondMov : Predicate<"Subtarget.hasCondMov()">,
AssemblerPredicate<"FeatureCondMov">;
+def HasFPIdx : Predicate<"Subtarget.hasFPIdx()">,
+ AssemblerPredicate<"FeatureFPIdx">;
def HasMips32 : Predicate<"Subtarget.hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
def HasMips64 : Predicate<"Subtarget.hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
-def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">,
- AssemblerPredicate<"FeatureMips32r2,FeatureMips64">;
def NotMips64 : Predicate<"!Subtarget.hasMips64()">,
AssemblerPredicate<"!FeatureMips64">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
@@ -172,11 +177,15 @@ def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
AssemblerPredicate<"FeatureMips32">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
AssemblerPredicate<"FeatureMips32">;
-def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
- AssemblerPredicate<"!FeatureMips16">;
+def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
+ AssemblerPredicate<"!FeatureMips16">;
class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
- let Predicates = [HasStandardEncoding];
+ let Predicates = [HasStdEnc];
+}
+
+class IsCommutable {
+ bit isCommutable = 1;
}
class IsBranch {
@@ -234,6 +243,10 @@ def calltarget64: Operand<i64>;
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
}
+
+def simm20 : Operand<i32> {
+}
+
def simm16_64 : Operand<i64>;
def shamt : Operand<i32>;
@@ -253,6 +266,7 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem64 : Operand<i64> {
@@ -260,18 +274,21 @@ def mem64 : Operand<i64> {
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea_64 : Operand<i64> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
// size operand of ext instruction
@@ -296,10 +313,21 @@ def HI16 : SDNodeXForm<imm, [{
return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
}]>;
+// Plus 1.
+def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+// Node immediate fits as 15-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt15 : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>;
+
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
// immediate are caught.
@@ -320,113 +348,84 @@ def immLow16Zero : PatLeaf<(imm), [{
// shamt field must fit in 5 bits.
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+// True if (N + 1) fits in 16-bit field.
+def immSExt16Plus1 : PatLeaf<(imm), [{
+ return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
+}]>;
+
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
def addr :
- ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
+
+def addrRegImm :
+ ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+
+def addrDefault :
+ ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-/// Move Control Registers From/To CPU Registers
-def MFC0_3OP : MFC3OP<0x10, 0, (outs CPURegs:$rt),
- (ins CPURegs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC0_3OP : MFC3OP<0x10, 4, (outs CPURegs:$rd, uimm16:$sel),
- (ins CPURegs:$rt),"mtc0\t$rt, $rd, $sel">;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
-def MFC2_3OP : MFC3OP<0x12, 0, (outs CPURegs:$rt),
- (ins CPURegs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC2_3OP : MFC3OP<0x12, 4, (outs CPURegs:$rd, uimm16:$sel),
- (ins CPURegs:$rt),"mtc2\t$rt, $rd, $sel">;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
// Arithmetic and logical instructions with 3 register operands.
-class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
- InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
- let shamt = 0;
+class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
let isCommutable = isComm;
let isReMaterializable = 1;
-}
-
-class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
- InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
- let shamt = 0;
- let isCommutable = isComm;
+ string BaseOpcode;
+ string Arch;
}
// Arithmetic and logical instructions with 2 register operands.
-class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type, RegisterClass RC> :
- FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
- [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> {
+class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
+ SDPatternOperator imm_type = null_frag,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
let isReMaterializable = 1;
}
-class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type, RegisterClass RC> :
- FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"), [], IIAlu>;
-
// Arithmetic Multiply ADD/SUB
-let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
-class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
- FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"),
- [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
- let rd = 0;
- let shamt = 0;
+class MArithR<string opstr, bit isComm = 0> :
+ InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
+ !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> {
+ let Defs = [HI, LO];
+ let Uses = [HI, LO];
let isCommutable = isComm;
}
// Logical
-class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
- let shamt = 0;
+class LogicNOR<string opstr, RegisterOperand RC>:
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
let isCommutable = 1;
}
// Shifts
-class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode, PatFrag PF, Operand ImmOpnd,
- RegisterClass RC>:
- FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
- !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
- [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> {
- let rs = isRotate;
-}
-
-// 32-bit shift instructions.
-class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode>:
- shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>;
-
-class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode, RegisterClass RC>:
- FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt, $rs"),
- [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> {
- let shamt = isRotate;
-}
+class shift_rotate_imm<string opstr, Operand ImmOpnd,
+ RegisterOperand RC, SDPatternOperator OpNode = null_frag,
+ SDPatternOperator PF = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
+ !strconcat(opstr, "\t$rd, $rt, $shamt"),
+ [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+
+class shift_rotate_reg<string opstr, RegisterOperand RC,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rt, $rs"),
+ [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
// Load Upper Imediate
-class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>:
- FI<op, (outs RC:$rt), (ins Imm:$imm16),
- !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove {
- let rs = 0;
+class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
+ InstSE<(outs RC:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
+ [], IIAlu, FrmI>, IsAsCheapAsAMove {
let neverHasSideEffects = 1;
let isReMaterializable = 1;
}
@@ -440,66 +439,40 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
}
// Memory Load/Store
-let canFoldAsLoad = 1 in
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
- Operand MemOpnd, bit Pseudo>:
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
- let isPseudo = Pseudo;
-}
-
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
- Operand MemOpnd, bit Pseudo>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(OpNode RC:$rt, addr:$addr)], IIStore> {
- let isPseudo = Pseudo;
-}
-
-// 32-bit load.
-multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd, ComplexPattern Addr> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
}
-// 64-bit load.
-multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd, ComplexPattern Addr> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
}
-// 32-bit store.
-multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass LoadM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Load<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 64-bit store.
-multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass StoreM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Store<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -507,81 +480,58 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
// Load/Store Left/Right
let canFoldAsLoad = 1 in
-class LoadLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
- RegisterClass RC, Operand MemOpnd> :
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr, RC:$src))], IILoad> {
+class LoadLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr, RC:$src))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
string Constraints = "$src = $rt";
}
-class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
- RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"), [(OpNode RC:$rt, addr:$addr)],
- IIStore>;
-
-// 32-bit load left/right.
-multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-// 64-bit load left/right.
-multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class StoreLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd>:
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
}
-// 32-bit store left/right.
-multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : LoadLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : LoadLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 64-bit store left/right.
-multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : StoreLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : StoreLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
// Conditional Branch
-class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
- BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+class CBranch<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch,
+ FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
let Defs = [AT];
}
-class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
- RegisterClass RC>:
- BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
- let rt = _rt;
+class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -589,27 +539,23 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
}
// SetCC
-class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
- RegisterClass RC>:
- FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
- IIAlu> {
- let shamt = 0;
-}
+class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
-class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
- PatLeaf imm_type, RegisterClass RC>:
- FI<op, (outs CPURegs:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
- [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
- IIAlu>;
+class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
+ RegisterClass RC>:
+ InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
+ IIAlu, FrmI>;
// Jump
-class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
- SDPatternOperator operator, SDPatternOperator targetoperator>:
- FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"),
- [(operator targetoperator:$target)], IIBranch> {
+class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
+ SDPatternOperator targetoperator> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [(operator targetoperator:$target)], IIBranch, FrmJ> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -618,27 +564,21 @@ class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
}
// Unconditional branch
-class UncondBranch<bits<6> op, string instr_asm>:
- BranchBase<op, (outs), (ins brtarget:$imm16),
- !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
- let rs = 0;
- let rt = 0;
+class UncondBranch<string opstr> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(br bb:$offset)], IIBranch, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
let hasDelaySlot = 1;
- let Predicates = [RelocPIC, HasStandardEncoding];
+ let Predicates = [RelocPIC, HasStdEnc];
let Defs = [AT];
}
// Base class for indirect branch and return instruction classes.
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>:
- FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> {
- let rt = 0;
- let rd = 0;
- let shamt = 0;
-}
+ InstSE<(outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch, FrmR>;
// Indirect branch
class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> {
@@ -656,465 +596,523 @@ class RetBase<RegisterClass RC>: JumpFR<RC> {
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
- class JumpLink<bits<6> op, string instr_asm>:
- FJ<op, (outs), (ins calltarget:$target),
- !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
- IIBranch> {
- let DecoderMethod = "DecodeJumpTarget";
- }
-
- class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
- RegisterClass RC>:
- FR<op, func, (outs), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
- let rt = 0;
- let rd = 31;
- let shamt = 0;
+ class JumpLink<string opstr> :
+ InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+ let DecoderMethod = "DecodeJumpTarget";
}
- class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
- FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
- let rt = _rt;
- }
+ class JumpLinkRegPseudo<RegisterClass RC, Instruction JALRInst,
+ Register RetReg>:
+ PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>,
+ PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>;
+
+ class JumpLinkReg<string opstr, RegisterClass RC>:
+ InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [], IIBranch, FrmR>;
+
+ class BGEZAL_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+
+}
+
+class BAL_FT :
+ InstSE<(outs), (ins brtarget:$offset), "bal\t$offset", [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Defs = [RA];
}
+// Sync
+let hasSideEffects = 1 in
+class SYNC_FT :
+ InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)],
+ NoItinerary, FrmOther>;
+
// Mul, Div
-class Mult<bits<6> func, string instr_asm, InstrItinClass itin,
- RegisterClass RC, list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
- let rd = 0;
- let shamt = 0;
+class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
+ itin, FrmR> {
let isCommutable = 1;
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>:
- Mult<func, instr_asm, itin, CPURegs, [HI, LO]>;
-
-class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin,
- RegisterClass RC, list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
- [(op RC:$rs, RC:$rt)], itin> {
- let rd = 0;
- let shamt = 0;
+// Pseudo multiply/divide instruction with explicit accumulator register
+// operands.
+class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ bit IsComm = 1, bit HasSideEffects = 0> :
+ PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt),
+ [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>,
+ PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> {
+ let isCommutable = IsComm;
+ let hasSideEffects = HasSideEffects;
+}
+
+// Pseudo multiply add/sub instruction with explicit accumulator register
+// operands.
+class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
+ : PseudoSE<(outs ACRegs:$ac),
+ (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin),
+ [(set ACRegs:$ac,
+ (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))],
+ IIImul>,
+ PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> {
+ string Constraints = "$acin = $ac";
+}
+
+class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
+ [], itin, FrmR> {
let Defs = DefRegs;
}
-class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>;
-
// Move from Hi/Lo
-class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC,
- list<Register> UseRegs>:
- FR<0x00, func, (outs RC:$rd), (ins),
- !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
- let rs = 0;
- let rt = 0;
- let shamt = 0;
+class MoveFromLOHI<string opstr, RegisterClass RC, list<Register> UseRegs>:
+ InstSE<(outs RC:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
let Uses = UseRegs;
let neverHasSideEffects = 1;
}
-class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
- list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
- let rt = 0;
- let rd = 0;
- let shamt = 0;
+class MoveToLOHI<string opstr, RegisterClass RC, list<Register> DefRegs>:
+ InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
- FMem<opc, (outs RC:$rt), (ins Mem:$addr),
- instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
- let isCodeGenOnly = 1;
+class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> :
+ InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let isCodeGenOnly = 1;
+ let DecoderMethod = "DecodeMem";
}
// Count Leading Ones/Zeros in Word
-class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
- FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
- Requires<[HasBitCount, HasStandardEncoding]> {
- let shamt = 0;
- let rt = rd;
-}
+class CountLeading0<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
+
+class CountLeading1<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
-class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
- FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
- Requires<[HasBitCount, HasStandardEncoding]> {
- let shamt = 0;
- let rt = rd;
-}
// Sign Extend in Register.
-class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
- RegisterClass RC>:
- FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt"),
- [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
- let rs = 0;
- let shamt = sa;
- let Predicates = [HasSEInReg, HasStandardEncoding];
+class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> :
+ InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"),
+ [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> {
+ let Predicates = [HasSEInReg, HasStdEnc];
}
// Subword Swap
-class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
- FR<0x1f, func, (outs RC:$rd), (ins RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
- let rs = 0;
- let shamt = sa;
- let Predicates = [HasSwap, HasStandardEncoding];
+class SubwordSwap<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
+ NoItinerary, FrmR> {
+ let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
// Read Hardware
-class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
- : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
- "rdhwr\t$rt, $rd", [], IIAlu> {
- let rs = 0;
- let shamt = 0;
-}
+class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> :
+ InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
+ IIAlu, FrmR>;
// Ext and Ins
-class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
- FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
- !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
- bits<5> pos;
- bits<5> sz;
- let rd = sz;
- let shamt = pos;
- let Predicates = [HasMips32r2, HasStandardEncoding];
-}
-
-class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
- FR<0x1f, _funct, (outs RC:$rt),
- (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
- !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
- NoItinerary> {
- bits<5> pos;
- bits<5> sz;
- let rd = sz;
- let shamt = pos;
- let Predicates = [HasMips32r2, HasStandardEncoding];
+class ExtBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ FrmR> {
+ let Predicates = [HasMips32r2, HasStdEnc];
+}
+
+class InsBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
+ NoItinerary, FrmR> {
+ let Predicates = [HasMips32r2, HasStdEnc];
let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
- RegisterClass PRC> :
+class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
[(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
-multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass Atomic2Ops32<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPURegs, CPURegs>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
// Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
- RegisterClass PRC> :
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
[(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
-multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
- Requires<[NotN64, HasStandardEncoding]>;
- def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>,
- Requires<[IsN64, HasStandardEncoding]> {
+multiclass AtomicCmpSwap32<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPURegs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
-class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
- FMem<Opc, (outs RC:$rt), (ins Mem:$addr),
- !strconcat(opstring, "\t$rt, $addr"), [], IILoad> {
+class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
let mayLoad = 1;
}
-class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
- FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr),
- !strconcat(opstring, "\t$rt, $addr"), [], IIStore> {
+class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
let mayStore = 1;
let Constraints = "$rt = $dst";
}
+class MFC3OP<dag outs, dag ins, string asmstr> :
+ InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
// Return RA.
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
- "!ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "!ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-let neverHasSideEffects = 1 in
-def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
- ".cprestore\t$loc", []>;
-
let usesCustomInserter = 1 in {
- defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
- defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">;
- defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32, "load_add_32">;
- defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">;
- defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">;
- defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">;
- defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8, "load_and_8">;
- defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16, "load_and_16">;
- defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32, "load_and_32">;
- defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8, "load_or_8">;
- defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16, "load_or_16">;
- defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32, "load_or_32">;
- defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">;
- defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">;
- defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">;
- defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">;
- defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">;
- defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">;
-
- defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8, "swap_8">;
- defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16, "swap_16">;
- defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32, "swap_32">;
-
- defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8, "8">;
- defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16, "16">;
- defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32, "32">;
-}
+ defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>;
+ defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>;
+ defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>;
+ defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>;
+ defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>;
+ defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>;
+ defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>;
+ defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>;
+ defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>;
+ defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>;
+ defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>;
+ defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>;
+ defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>;
+ defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>;
+ defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>;
+ defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>;
+ defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
+ defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
+
+ defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>;
+ defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>;
+ defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>;
+
+ defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>;
+ defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>;
+ defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>;
+ defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
+}
+
+def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
-
-class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
- !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>;
-
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>;
-
-class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
- !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>;
-
//===----------------------------------------------------------------------===//
// MipsI Instructions
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>,
- IsAsCheapAsAMove;
-def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
-def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
-def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
-def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
-def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
-def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
-def LUi : LoadUpper<0x0f, "lui", CPURegs, uimm16>;
+def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+ ADDI_FM<0x9>, IsAsCheapAsAMove;
+def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
+def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
+def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
+def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
-def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
-def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
-def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
-def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
-def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
-def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
-def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>;
-def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
-def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
+def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
+def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
+def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
+def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : shift_rotate_imm32<0x00, 0x00, "sll", shl>;
-def SRL : shift_rotate_imm32<0x02, 0x00, "srl", srl>;
-def SRA : shift_rotate_imm32<0x03, 0x00, "sra", sra>;
-def SLLV : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>;
-def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
-def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
+def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+ SRA_FM<0, 0>;
+def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+ SRA_FM<2, 0>;
+def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+ SRA_FM<3, 0>;
+def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
- def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
- def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
+let Predicates = [HasMips32r2, HasStdEnc] in {
+ def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+ SRA_FM<2, 1>;
+ def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
}
/// Load and Store Instructions
/// aligned
-defm LB : LoadM32<0x20, "lb", sextloadi8>;
-defm LBu : LoadM32<0x24, "lbu", zextloadi8>;
-defm LH : LoadM32<0x21, "lh", sextloadi16>;
-defm LHu : LoadM32<0x25, "lhu", zextloadi16>;
-defm LW : LoadM32<0x23, "lw", load>;
-defm SB : StoreM32<0x28, "sb", truncstorei8>;
-defm SH : StoreM32<0x29, "sh", truncstorei16>;
-defm SW : StoreM32<0x2b, "sw", store>;
+defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
+defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
+defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
+defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
+defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
+defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
/// load/store left/right
-defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>;
-defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>;
-defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
-defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
+defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
+defm LWR : LoadLeftRightM<"lwr", MipsLWR, CPURegs>, LW_FM<0x26>;
+defm SWL : StoreLeftRightM<"swl", MipsSWL, CPURegs>, LW_FM<0x2a>;
+defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>;
-let hasSideEffects = 1 in
-def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
- [(MipsSync imm:$stype)], NoItinerary, FrmOther>
-{
- bits<5> stype;
- let Opcode = 0;
- let Inst{25-11} = 0;
- let Inst{10-6} = stype;
- let Inst{5-0} = 15;
-}
+def SYNC : SYNC_FT, SYNC_FM;
/// Load-linked, Store-conditional
-def LL : LLBase<0x30, "ll", CPURegs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
-def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
+let Predicates = [NotN64, HasStdEnc] in {
+ def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>;
+ def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>;
}
-def SC : SCBase<0x38, "sc", CPURegs, mem>,
- Requires<[NotN64, HasStandardEncoding]>;
-def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let DecoderNamespace = "Mips64";
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>;
+ def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>;
}
/// Jump and Branch Instructions
-def J : JumpFJ<0x02, jmptarget, "j", br, bb>,
- Requires<[RelocStatic, HasStandardEncoding]>, IsBranch;
-def JR : IndirectBranch<CPURegs>;
-def B : UncondBranch<0x04, "b">;
-def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
-def BNE : CBranch<0x05, "bne", setne, CPURegs>;
-def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
-def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
-def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
-def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
-
-let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1,
- hasDelaySlot = 1, Defs = [RA] in
-def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>;
-
-def JAL : JumpLink<0x03, "jal">;
-def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
-def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>;
-def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>;
-def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall;
-def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall;
-
-def RET : RetBase<CPURegs>;
+def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
+ Requires<[RelocStatic, HasStdEnc]>, IsBranch;
+def JR : IndirectBranch<CPURegs>, MTLO_FM<8>;
+def B : UncondBranch<"b">, B_FM;
+def BEQ : CBranch<"beq", seteq, CPURegs>, BEQ_FM<4>;
+def BNE : CBranch<"bne", setne, CPURegs>, BEQ_FM<5>;
+def BGEZ : CBranchZero<"bgez", setge, CPURegs>, BGEZ_FM<1, 1>;
+def BGTZ : CBranchZero<"bgtz", setgt, CPURegs>, BGEZ_FM<7, 0>;
+def BLEZ : CBranchZero<"blez", setle, CPURegs>, BGEZ_FM<6, 0>;
+def BLTZ : CBranchZero<"bltz", setlt, CPURegs>, BGEZ_FM<1, 0>;
+
+def BAL_BR: BAL_FT, BAL_FM;
+
+def JAL : JumpLink<"jal">, FJ<3>;
+def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM;
+def JALRPseudo : JumpLinkRegPseudo<CPURegs, JALR, RA>;
+def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>;
+def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>;
+def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
+def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+
+def RET : RetBase<CPURegs>, MTLO_FM<8>;
+
+// Exception handling related node and instructions.
+// The conversion sequence is:
+// ISD::EH_RETURN -> MipsISD::EH_RETURN ->
+// MIPSeh_return -> (stack change + indirect branch)
+//
+// MIPSeh_return takes the place of regular return instruction
+// but takes two arguments (V1, V0) which are used for storing
+// the offset and return address respectively.
+def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
-/// Multiply and Divide Instructions.
-def MULT : Mult32<0x18, "mult", IIImul>;
-def MULTu : Mult32<0x19, "multu", IIImul>;
-def SDIV : Div32<MipsDivRem, 0x1a, "div", IIIdiv>;
-def UDIV : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst),
+ [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>;
+ def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff,
+ CPU64Regs:$dst),
+ [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>;
+}
-def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>;
-def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>;
-def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>;
-def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>;
+/// Multiply and Divide Instructions.
+def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
+def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
+def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>;
+def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv,
+ 0>;
+
+def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
+def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
+def MFHI : MoveFromLOHI<"mfhi", CPURegs, [HI]>, MFLO_FM<0x10>;
+def MFLO : MoveFromLOHI<"mflo", CPURegs, [LO]>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>;
-def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>;
+def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>;
+def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def CLZ : CountLeading0<0x20, "clz", CPURegs>;
-def CLO : CountLeading1<0x21, "clo", CPURegs>;
+def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>;
+def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>;
/// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>;
+def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>;
-/// No operation
-let addr=0 in
- def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+/// No operation.
+def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
// FrameIndexes are legalized when they are operands from load/store
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
-
-// DynAlloc node points to dynamically allocated stack space.
-// $sp is added to the list of implicitly used registers to prevent dead code
-// elimination from removing instructions that modify $sp.
-let Uses = [SP] in
-def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
+def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MArithR<0, "madd", MipsMAdd, 1>;
-def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
-def MSUB : MArithR<4, "msub", MipsMSub>;
-def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
+def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
-// MUL is a assembly macro in the current used ISAs. In recent ISA's
-// it is a real instruction.
-def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
- Requires<[HasMips32, HasStandardEncoding]>;
+def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
-def RDHWR : ReadHardware<CPURegs, HWRegs>;
+def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>;
+def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>;
-def EXT : ExtBase<0, "ext", CPURegs>;
-def INS : InsBase<4, "ins", CPURegs>;
+/// Move Control Registers From/To CPU Registers
+def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
+ "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
+
+def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
+ "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
+
+def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
+ "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
+
+def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
+ "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>;
-def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>;
-def : InstAlias<"addu $rs,$rt,$imm",
- (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"add $rs,$rt,$imm",
- (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"and $rs,$rt,$imm",
- (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"j $rs", (JR CPURegs:$rs)>;
-def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>;
-def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"slt $rs,$rt,$imm",
- (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"xor $rs,$rt,$imm",
- (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"move $dst, $src",
+ (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+ Requires<[NotMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+ Requires<[NotMips64]>;
+def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>;
+def : InstAlias<"addu $rs, $rt, $imm",
+ (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"add $rs, $rt, $imm",
+ (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
+def : InstAlias<"neg $rt, $rs",
+ (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"negu $rt, $rs",
+ (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+def : InstAlias<"mfc0 $rt, $rd",
+ (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd",
+ (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+def : InstAlias<"mfc2 $rt, $rd",
+ (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd",
+ (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>;
+
+class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr")> ;
+def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>;
+
+class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
+
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
+// Load/store pattern templates.
+class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
+ MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
+
+class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
+ MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
+
// Small immediates
def : MipsPat<(i32 immSExt16:$in),
(ADDiu ZERO, imm:$in)>;
@@ -1194,25 +1192,25 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
def : MipsPat<(not CPURegs:$in),
- (NOR CPURegs:$in, ZERO)>;
+ (NOR CPURegsOpnd:$in, ZERO)>;
// extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
}
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
}
// peepholes
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
}
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
}
@@ -1289,12 +1287,27 @@ defm : SetgtPats<CPURegs, SLT, SLTu>;
defm : SetgePats<CPURegs, SLT, SLTu>;
defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
-
// bswap pattern
def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+// mflo/hi patterns.
+def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
+
+// Load halfword/word patterns.
+let AddedComplexity = 40 in {
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu, i32, zextloadi8>;
+ def : LoadRegImmPat<LH, i32, sextloadi16>;
+ def : LoadRegImmPat<LW, i32, load>;
+ }
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
+ def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
+ def : LoadRegImmPat<LW_P8, i32, load>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index da1119df8f9f..1b2a325d3ce6 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -16,12 +16,12 @@
#include "MipsInstrInfo.h"
#include "MipsRelocations.h"
#include "MipsSubtarget.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 5d9f0cffb749..2efe534053a2 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -10,21 +10,21 @@
// This pass expands a branch or jump instruction into a long branch if its
// offset is too large to fit into its immediate field.
//
-// FIXME:
-// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
+// FIXME:
+// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
// 2. If program has inline assembly statements whose size cannot be
-// determined accurately, load branch target addresses from the GOT.
+// determined accurately, load branch target addresses from the GOT.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-long-branch"
#include "Mips.h"
-#include "MipsTargetMachine.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -258,7 +258,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BalTgtMBB->addSuccessor(TgtMBB);
int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
- int64_t Offset = TgtAddress - (I.Address + I.Size - 20);
+ unsigned BalTgtMBBSize = 5;
+ int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4);
int64_t Lo = SignExtend64<16>(Offset & 0xffff);
int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
@@ -283,9 +284,10 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP).addImm(-8);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
.addReg(Mips::SP).addImm(0);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi)
- ->setIsInsideBundle();
+
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi));
Pos = BalTgtMBB->begin();
@@ -295,9 +297,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::RA).addReg(Mips::AT);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
.addReg(Mips::SP).addImm(0);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(8)->setIsInsideBundle();
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8));
} else {
// $longbr:
// daddiu $sp, $sp, -16
@@ -335,9 +339,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::AT_64).addImm(16);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
.addReg(Mips::AT_64).addImm(Hi);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
- .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle();
+
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(16));
Pos = BalTgtMBB->begin();
@@ -347,10 +353,15 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::RA_64).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
.addReg(Mips::SP_64).addImm(0);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
- .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle();
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64))
+ .append(BuildMI(*MF, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ .addReg(Mips::SP_64).addImm(16));
}
+
+ assert(BalTgtMBBSize == BalTgtMBB->size());
+ assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize);
} else {
// $longbr:
// j $tgt
@@ -359,8 +370,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
//
Pos = LongBrMBB->begin();
LongBrMBB->addSuccessor(TgtMBB);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+ MIBundleBuilder(*LongBrMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
+
+ assert(LongBrMBB->size() == LongBranchSeqSize);
}
if (I.Br->isUnconditionalBranch()) {
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 4162f981d1df..d836975eb7d2 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
#include "MipsMCInstLower.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 5ff19aba0267..59b23f7ad7c1 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsInstrInfo.h"
#include "MipsSubtarget.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -56,4 +56,20 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() {
return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC);
}
+void MipsFunctionInfo::createEhDataRegsFI() {
+ for (int I = 0; I < 4; ++I) {
+ const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+ const TargetRegisterClass *RC = ST.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ }
+}
+
+bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
+ return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
+ || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
+}
+
void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index bb45f92f18fd..b05b348037d9 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,8 +15,8 @@
#define MIPS_MACHINE_FUNCTION_INFO_H
#include "MipsSubtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include <utility>
@@ -53,10 +53,16 @@ class MipsFunctionInfo : public MachineFunctionInfo {
/// Size of incoming argument area.
unsigned IncomingArgSize;
+ /// CallsEhReturn - Whether the function calls llvm.eh.return.
+ bool CallsEhReturn;
+
+ /// Frame objects for spilling eh data registers.
+ int EhDataRegFI[4];
+
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
- VarArgsFrameIndex(0)
+ VarArgsFrameIndex(0), CallsEhReturn(false)
{}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
@@ -78,6 +84,14 @@ public:
}
unsigned getIncomingArgSize() const { return IncomingArgSize; }
+
+ bool callsEhReturn() const { return CallsEhReturn; }
+ void setCallsEhReturn() { CallsEhReturn = true; }
+
+ void createEhDataRegsFI();
+ int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
+ bool isEhDataRegFI(int FI) const;
+
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index d8e0dd436a95..32507334e9c6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -17,25 +17,25 @@
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
#include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
@@ -47,6 +47,28 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+unsigned
+MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case Mips::CPURegsRegClassID:
+ case Mips::CPU64RegsRegClassID:
+ case Mips::DSPRegsRegClassID: {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return 28 - TFI->hasFP(MF);
+ }
+ case Mips::FGR32RegClassID:
+ return 32;
+ case Mips::AFGR64RegClassID:
+ return 16;
+ case Mips::FGR64RegClassID:
+ return 32;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Callee Saved Registers methods
//===----------------------------------------------------------------------===//
@@ -155,21 +177,14 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
// direct reference.
void MipsRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FIOperandNum, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- unsigned i = 0;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
errs() << "<--------->\n" << MI);
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
uint64_t stackSize = MF.getFrameInfo()->getStackSize();
int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
@@ -177,7 +192,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n");
- eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
+ eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 78adf7f18bf2..5ed51241391f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,6 +42,8 @@ public:
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
@@ -53,9 +55,11 @@ public:
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -64,6 +68,9 @@ public:
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+ /// \brief Return GPR register class.
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 391c19e07e33..64458bcef7ef 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,54 +18,56 @@ def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
}
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
// We have banks of 32 registers each.
-class MipsReg<string n> : Register<n> {
- field bits<5> Num;
+class MipsReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "Mips";
}
-class MipsRegWithSubRegs<string n, list<Register> subregs>
+class MipsRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
: RegisterWithSubRegs<n, subregs> {
- field bits<5> Num;
+ let HWEncoding = Enc;
let Namespace = "Mips";
}
// Mips CPU Registers
-class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
- let Num = num;
-}
+class MipsGPRReg<bits<16> Enc, string n> : MipsReg<Enc, n>;
// Mips 64-bit CPU Registers
-class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
- : MipsRegWithSubRegs<n, subregs> {
- let Num = num;
+class Mips64GPRReg<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
let SubRegIndices = [sub_32];
}
// Mips 32-bit FPU Registers
-class FPR<bits<5> num, string n> : MipsReg<n> {
- let Num = num;
-}
+class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
// Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs>
- : MipsRegWithSubRegs<n, subregs> {
- let Num = num;
+class AFPR<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
let SubRegIndices = [sub_fpeven, sub_fpodd];
let CoveredBySubRegs = 1;
}
-class AFPR64<bits<5> num, string n, list<Register> subregs>
- : MipsRegWithSubRegs<n, subregs> {
- let Num = num;
+class AFPR64<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
let SubRegIndices = [sub_32];
}
-// Mips Hardware Registers
-class HWR<bits<5> num, string n> : MipsReg<n> {
- let Num = num;
+// Accumulator Registers
+class ACC<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_lo, sub_hi];
+ let CoveredBySubRegs = 1;
}
+// Mips Hardware Registers
+class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
+
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
@@ -228,7 +230,13 @@ let Namespace = "Mips" in {
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
+ def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
+ def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
def LO : Register<"lo">, DwarfRegNum<[65]>;
+ def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
+ def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
+ def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
let SubRegIndices = [sub_32] in {
def HI64 : RegisterWithSubRegs<"hi", [HI]>;
@@ -239,21 +247,22 @@ let Namespace = "Mips" in {
def FCR31 : Register<"31">;
// fcc0 register
- def FCC0 : Register<"fcc0">;
+ def FCC0 : MipsReg<0, "fcc0">;
// PC register
def PC : Register<"pc">;
// Hardware register $29
- def HWR29 : Register<"29">;
- def HWR29_64 : Register<"29">;
+ def HWR29 : MipsReg<29, "29">;
+ def HWR29_64 : MipsReg<29, "29">;
// Accum registers
- let SubRegIndices = [sub_lo, sub_hi] in
- def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>;
- def AC1 : Register<"ac1">;
- def AC2 : Register<"ac2">;
- def AC3 : Register<"ac3">;
+ def AC0 : ACC<0, "ac0", [LO, HI]>;
+ def AC1 : ACC<1, "ac1", [LO1, HI1]>;
+ def AC2 : ACC<2, "ac2", [LO2, HI2]>;
+ def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+
+ def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
def DSPCtrl : Register<"dspctrl">;
}
@@ -300,9 +309,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S1)>;
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -328,15 +337,70 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
// Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+ let Size = 64;
+}
+
+def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+ let Size = 128;
+}
+
+def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+ let Size = 64;
+}
+
+def CPURegsAsmOperand : AsmOperandClass {
+ let Name = "CPURegsAsm";
+ let ParserMethod = "parseCPURegs";
+}
+
+def CPU64RegsAsmOperand : AsmOperandClass {
+ let Name = "CPU64RegsAsm";
+ let ParserMethod = "parseCPU64Regs";
+}
+
+def CCRAsmOperand : AsmOperandClass {
+ let Name = "CCRAsm";
+ let ParserMethod = "parseCCRRegs";
+}
+
+def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> {
+ let ParserMatchClass = CPURegsAsmOperand;
+}
+
+def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> {
+ let ParserMatchClass = CPU64RegsAsmOperand;
+}
+
+def CCROpnd : RegisterOperand<CCR, "printCPURegs"> {
+ let ParserMatchClass = CCRAsmOperand;
+}
+
+def HWRegsAsmOperand : AsmOperandClass {
+ let Name = "HWRegsAsm";
+ let ParserMethod = "parseHWRegs";
+}
+
+def HW64RegsAsmOperand : AsmOperandClass {
+ let Name = "HW64RegsAsm";
+ let ParserMethod = "parseHW64Regs";
+}
+
+def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+ let ParserMatchClass = HWRegsAsmOperand;
+}
+
+def HW64RegsOpnd : RegisterOperand<HWRegs64, "printCPURegs"> {
+ let ParserMatchClass = HW64RegsAsmOperand;
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 03f5176b2974..68ec92188802 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -12,26 +12,187 @@
//===----------------------------------------------------------------------===//
#include "MipsSEFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
-#include "MipsSEInstrInfo.h"
#include "MipsMachineFunction.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "MipsSEInstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+namespace {
+typedef MachineBasicBlock::iterator Iter;
+
+/// Helper class to expand accumulator pseudos.
+class ExpandACCPseudo {
+public:
+ ExpandACCPseudo(MachineFunction &MF);
+ bool expand();
+
+private:
+ bool expandInstr(MachineBasicBlock &MBB, Iter I);
+ void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+
+ MachineFunction &MF;
+ const MipsSEInstrInfo &TII;
+ const MipsRegisterInfo &RegInfo;
+ MachineRegisterInfo &MRI;
+};
+}
+
+ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ : MF(MF_),
+ TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
+ RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
+
+bool ExpandACCPseudo::expand() {
+ bool Expanded = false;
+
+ for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
+ BB != BBEnd; ++BB)
+ for (Iter I = BB->begin(), End = BB->end(); I != End;)
+ Expanded |= expandInstr(*BB, I++);
+
+ return Expanded;
+}
+
+bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+ switch(I->getOpcode()) {
+ case Mips::LOAD_AC64:
+ case Mips::LOAD_AC64_P8:
+ case Mips::LOAD_AC_DSP:
+ case Mips::LOAD_AC_DSP_P8:
+ expandLoad(MBB, I, 4);
+ break;
+ case Mips::LOAD_AC128:
+ case Mips::LOAD_AC128_P8:
+ expandLoad(MBB, I, 8);
+ break;
+ case Mips::STORE_AC64:
+ case Mips::STORE_AC64_P8:
+ case Mips::STORE_AC_DSP:
+ case Mips::STORE_AC_DSP_P8:
+ expandStore(MBB, I, 4);
+ break;
+ case Mips::STORE_AC128:
+ case Mips::STORE_AC128_P8:
+ expandStore(MBB, I, 8);
+ break;
+ case Mips::COPY_AC64:
+ case Mips::COPY_AC_DSP:
+ expandCopy(MBB, I, 4);
+ break;
+ case Mips::COPY_AC128:
+ expandCopy(MBB, I, 8);
+ break;
+ default:
+ return false;
+ }
+
+ MBB.erase(I);
+ return true;
+}
+
+void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // load $vr0, FI
+ // copy lo, $vr0
+ // load $vr1, FI + 4
+ // copy hi, $vr1
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+
+ TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill);
+ TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize);
+ BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
+}
+
+void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, lo
+ // store $vr0, FI
+ // copy $vr1, hi
+ // store $vr1, FI + 4
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
+ unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+ TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+ TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
+}
+
+void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, src_lo
+ // copy dst_lo, $vr0
+ // copy $vr1, src_hi
+ // copy dst_hi, $vr1
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+ unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
+ unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
+ .addReg(VR0, RegState::Kill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
+ .addReg(VR1, RegState::Kill);
+}
+
+unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
+ static const unsigned EhDataReg[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned EhDataReg64[] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
+ };
+
+ return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I];
+}
+
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsRegisterInfo *RegInfo =
static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
const MipsSEInstrInfo &TII =
@@ -105,6 +266,30 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Insert instructions that spill eh data registers.
+ for (int I = 0; I < 4; ++I) {
+ if (!MBB.isLiveIn(ehDataReg(I)))
+ MBB.addLiveIn(ehDataReg(I));
+ TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false,
+ MipsFI->getEhDataRegFI(I), RC, RegInfo);
+ }
+
+ // Emit .cfi_offset directives for eh data registers.
+ MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2);
+ for (int I = 0; I < 4; ++I) {
+ int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(ehDataReg(I));
+ Moves.push_back(MachineMove(CSLabel2, DstML, SrcML));
+ }
+ }
+
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
@@ -124,6 +309,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
@@ -144,6 +332,22 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
}
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Find first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instructions that restore eh data registers.
+ for (int J = 0; J < 4; ++J) {
+ TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J),
+ RC, RegInfo);
+ }
+ }
+
// Get the number of bytes from FrameInfo
uint64_t StackSize = MFI->getStackSize();
@@ -191,19 +395,59 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// Reserve call frame if the size of the maximum call frame fits into 16-bit
// immediate field and there are no variable sized objects on the stack.
- return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+ // Make sure the second register scavenger spill slot can be accessed with one
+ // instruction.
+ return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
+ !MFI->hasVarSizedObjects();
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsSEFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ TII.adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
}
void MipsSEFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
// Mark $fp as used if function has dedicated frame pointer.
if (hasFP(MF))
MRI.setPhysRegUsed(FP);
+ // Create spill slots for eh data registers if function calls eh_return.
+ if (MipsFI->callsEhReturn())
+ MipsFI->createEhDataRegsFI();
+
+ // Expand pseudo instructions which load, store or copy accumulators.
+ // Add an emergency spill slot if a pseudo was expanded.
+ if (ExpandACCPseudo(MF).expand()) {
+ // The spill slot should be half the size of the accumulator. If target is
+ // mips64, it should be 64-bit, otherwise it should be 32-bt.
+ const TargetRegisterClass *RC = STI.hasMips64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+ int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ RS->addScavengingFrameIndex(FI);
+ }
+
// Set scavenging frame index if necessary.
uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
estimateStackSize(MF);
@@ -215,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
&Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
RC->getAlignment(), false);
- RS->setScavengingFrameIndex(FI);
+ RS->addScavengingFrameIndex(FI);
}
const MipsFrameLowering *
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 6481a0ac86d7..193a66cc65a7 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,13 +21,17 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI) {}
+ : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -37,6 +41,7 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
+ unsigned ehDataReg(unsigned I) const;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 000000000000..d6d220750c61
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,473 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+
+ case MipsISD::InsertLOHI: {
+ unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
+ Mips::ACRegsRegClassID;
+ SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
+ SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
+ SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
+ const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
+ Node->getOperand(1), HiIdx };
+ SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops, 5);
+ return std::make_pair(true, Res);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 000000000000..6137ab040bbc
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 000000000000..4f219218d31f
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,442 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Custom);
+
+ if (HasMips64)
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch(Op.getOpcode()) {
+ case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
+ case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
+ case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
+ case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
+ case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
+ case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
+ case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+ }
+
+ return MipsTargetLowering::LowerOperation(Op, DAG);
+}
+
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = ADDENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ ADDCNode->getOperand(1),
+ ADDENode->getOperand(1));
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+ }
+ if (!SDValue(ADDENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = SUBENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SUBCNode->getOperand(0),
+ SUBENode->getOperand(0));
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+ }
+ if (!SDValue(SUBENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMADD(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMSUB(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue
+MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ case ISD::ADDE:
+ return performADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return performSUBECombine(N, DAG, DCI, Subtarget);
+ default:
+ return MipsTargetLowering::PerformDAGCombine(N, DCI);
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
+ bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const {
+ EVT Ty = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Lo, Hi;
+
+ if (HasLo)
+ Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ if (HasHi)
+ Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+
+ if (!HasLo || !HasHi)
+ return HasLo ? Lo : Hi;
+
+ SDValue Vals[] = { Lo, Hi };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 000000000000..186f6a343dee
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,62 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
+ &Mips::ACRegsRegClass;
+
+ return TargetLowering::getRepRegClassFor(VT);
+ }
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index fb0f9df038c3..ca0315ed9f6e 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#include "MipsSEInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -90,7 +90,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
if (Mips::CPURegsRegClass.contains(SrcReg))
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ Opc = Mips::OR, ZeroReg = Mips::ZERO;
else if (Mips::CCRRegClass.contains(SrcReg))
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -120,7 +120,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::MOVCCRToCCR;
else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::CPU64RegsRegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
else if (SrcReg == Mips::HI64)
Opc = Mips::MFHI64, SrcReg = 0;
else if (SrcReg == Mips::LO64)
@@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::FGR64RegClass.contains(DestReg))
Opc = Mips::DMTC1;
}
+ else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC64;
+ else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC_DSP;
+ else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC128;
assert(Opc && "Cannot copy registers");
@@ -144,18 +150,18 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg)
MIB.addReg(DestReg, RegState::Define);
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
if (SrcReg)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
}
void MipsSEInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
}
void MipsSEInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
.addMemOperand(MMO);
}
@@ -220,6 +236,10 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::ExtractElementF64:
ExpandExtractElementF64(MBB, MI);
break;
+ case Mips::MIPSeh_return32:
+ case Mips::MIPSeh_return64:
+ ExpandEhReturn(MBB, MI);
+ break;
}
MBB.erase(MI);
@@ -356,6 +376,35 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
.addReg(HiReg);
}
+void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // This pseudo instruction is generated as part of the lowering of
+ // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
+ // indirect jump to TargetReg
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR;
+ unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+ unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned OffsetReg = I->getOperand(0).getReg();
+ unsigned TargetReg = I->getOperand(1).getReg();
+
+ // or $ra, $v0, $zero
+ // addu $sp, $sp, $v1
+ // jr $ra
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), T9)
+ .addReg(TargetReg).addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA)
+ .addReg(TargetReg).addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
+ .addReg(SP).addReg(OffsetReg);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+}
+
const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
return new MipsSEInstrInfo(TM);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 55b78b2cfb97..0bf7876f0fe0 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -49,17 +49,19 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
@@ -85,6 +87,8 @@ private:
MachineBasicBlock::iterator I) const;
void ExpandBuildPairF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ void ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
};
}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 56b9ba95e5de..96967380b29d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -15,28 +15,28 @@
#include "MipsSERegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
-#include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -54,26 +54,13 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsSERegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
-
- const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
- unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+const TargetRegisterClass *
+MipsSERegisterInfo::intRegClass(unsigned Size) const {
+ if (Size == 4)
+ return &Mips::CPURegsRegClass;
- II->adjustStackPtr(SP, Amount, MBB, I);
- }
-
- MBB.erase(I);
+ assert(Size == 8);
+ return &Mips::CPU64RegsRegClass;
}
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
@@ -83,6 +70,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -93,15 +81,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
}
+ bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
+
// The following stack frame objects are always referenced relative to $sp:
// 1. Outgoing arguments.
// 2. Pointer to dynamically allocated stack space.
// 3. Locations for callee-saved registers.
+ // 4. Locations for eh data registers.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
unsigned FrameReg;
- if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
+ if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 7437bd36c333..2f7c37bb460d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -31,9 +31,7 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 930af4dda159..e11e5d142b74 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -26,13 +26,14 @@ void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- Reloc::Model RM) :
+ Reloc::Model _RM) :
MipsGenSubtargetInfo(TT, CPU, FS),
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
- IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
- HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false),
- HasDSP(false), HasDSPR2(false), IsAndroid(false)
+ IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
+ HasBitCount(false), HasFPIdx(false),
+ InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ RM(_RM)
{
std::string CPUName = CPU;
if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ff69237ec2bd..7a2e47ce5a9d 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,8 +14,9 @@
#ifndef MIPSSUBTARGET_H
#define MIPSSUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "MCTargetDesc/MipsReginfo.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -76,30 +77,32 @@ protected:
// HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
bool HasCondMov;
- // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
- // instructions.
- bool HasMulDivAdd;
-
- // HasMinMax - MIN and MAX instructions.
- bool HasMinMax;
-
// HasSwap - Byte and half swap instructions.
bool HasSwap;
// HasBitCount - Count leading '1' and '0' bits.
bool HasBitCount;
+ // HasFPIdx -- Floating point indexed load/store instructions.
+ bool HasFPIdx;
+
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
+ // InMicroMips -- can process MicroMips instructions
+ bool InMicroMipsMode;
+
// HasDSP, HasDSPR2 -- supports DSP ASE.
bool HasDSP, HasDSPR2;
- // IsAndroid -- target is android
- bool IsAndroid;
-
InstrItineraryData InstrItins;
+ // The instance to the register info section object
+ MipsReginfo MRI;
+
+ // Relocation Model
+ Reloc::Model RM;
+
public:
virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
@@ -127,8 +130,6 @@ public:
bool hasMips64() const { return MipsArchVersion >= Mips64; }
bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
- bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); }
-
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
@@ -137,9 +138,9 @@ public:
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
+ bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
- bool isAndroid() const { return IsAndroid; }
bool isLinux() const { return IsLinux; }
bool useSmallSection() const { return UseSmallSection; }
@@ -148,10 +149,15 @@ public:
/// Features related to the presence of specific instructions.
bool hasSEInReg() const { return HasSEInReg; }
bool hasCondMov() const { return HasCondMov; }
- bool hasMulDivAdd() const { return HasMulDivAdd; }
- bool hasMinMax() const { return HasMinMax; }
bool hasSwap() const { return HasSwap; }
bool hasBitCount() const { return HasBitCount; }
+ bool hasFPIdx() const { return HasFPIdx; }
+
+ // Grab MipsRegInfo object
+ const MipsReginfo &getMReginfo() const { return MRI; }
+
+ // Grab relocation model
+ Reloc::Model getRelocationModel() const {return RM;}
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 983ee219412b..33363580aba7 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,8 +15,8 @@
#include "Mips.h"
#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -45,15 +45,16 @@ MipsTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS, isLittle, RM),
DL(isLittle ?
(Subtarget.isABI_N64() ?
- "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") :
(Subtarget.isABI_N64() ?
- "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo(),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
@@ -115,6 +116,8 @@ bool MipsPassConfig::addPreEmitPass() {
// NOTE: long branch has not been implemented for mips16.
if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
addPass(createMipsLongBranchPass(TM));
+ if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ addPass(createMipsConstantIslandPass(TM));
return true;
}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index b54f5cee6d4d..7e5f19226433 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -15,15 +15,15 @@
#define MIPSTARGETMACHINE_H
#include "MipsFrameLowering.h"
-#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class formatted_raw_ostream;
@@ -32,13 +32,11 @@ class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
const DataLayout DL; // Calculates type size & alignment
- const MipsInstrInfo *InstrInfo;
- const MipsFrameLowering *FrameLowering;
- MipsTargetLowering TLInfo;
+ OwningPtr<const MipsInstrInfo> InstrInfo;
+ OwningPtr<const MipsFrameLowering> FrameLowering;
+ OwningPtr<const MipsTargetLowering> TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MipsTargetMachine(const Target &T, StringRef TT,
@@ -47,12 +45,12 @@ public:
CodeGenOpt::Level OL,
bool isLittle);
- virtual ~MipsTargetMachine() { delete InstrInfo; }
+ virtual ~MipsTargetMachine() {}
virtual const MipsInstrInfo *getInstrInfo() const
- { return InstrInfo; }
+ { return InstrInfo.get(); }
virtual const TargetFrameLowering *getFrameLowering() const
- { return FrameLowering; }
+ { return FrameLowering.get(); }
virtual const MipsSubtarget *getSubtargetImpl() const
{ return &Subtarget; }
virtual const DataLayout *getDataLayout() const
@@ -65,20 +63,13 @@ public:
}
virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ return TLInfo.get();
}
virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 881908b82c91..4c748c5b57cd 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,14 +9,14 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
static cl::opt<unsigned>
@@ -38,6 +38,20 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
+ // Register info information
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if (Subtarget.isABI_N64() || Subtarget.isABI_N32())
+ ReginfoSection =
+ getContext().getELFSection(".MIPS.options",
+ ELF::SHT_MIPS_OPTIONS,
+ ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP,
+ SectionKind::getMetadata());
+ else
+ ReginfoSection =
+ getContext().getELFSection(".reginfo",
+ ELF::SHT_MIPS_REGINFO,
+ ELF::SHF_ALLOC,
+ SectionKind::getMetadata());
}
// A address must be loaded from a small section if its size is less than the
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index c394a9dc02e4..c0e9140c829c 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -17,6 +17,7 @@ namespace llvm {
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
+ const MCSection *ReginfoSection;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM);
@@ -35,6 +36,7 @@ namespace llvm {
const TargetMachine &TM) const;
// TODO: Classify globals as mips wishes.
+ const MCSection *getReginfoSection() const { return ReginfoSection; }
};
} // end namespace llvm
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 243632b20aac..3615c146a527 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7cb16b4dd810..7da2fed4cd57 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,7 +22,7 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
- VectorElementize.cpp
+ NVVMReflect.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 454583850b71..b3e8b5d2622d 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -52,25 +52,24 @@ enum PropertyAnnotation {
};
const unsigned AnnotationNameLen = 8; // length of each annotation name
-const char
-PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
- "maxntidx", // PROPERTY_MAXNTID_X
- "maxntidy", // PROPERTY_MAXNTID_Y
- "maxntidz", // PROPERTY_MAXNTID_Z
- "reqntidx", // PROPERTY_REQNTID_X
- "reqntidy", // PROPERTY_REQNTID_Y
- "reqntidz", // PROPERTY_REQNTID_Z
- "minctasm", // PROPERTY_MINNCTAPERSM
- "texture", // PROPERTY_ISTEXTURE
- "surface", // PROPERTY_ISSURFACE
- "sampler", // PROPERTY_ISSAMPLER
- "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
- "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
- "kernel", // PROPERTY_ISKERNEL_FUNCTION
- "align", // PROPERTY_ALIGN
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+ "maxntidx", // PROPERTY_MAXNTID_X
+ "maxntidy", // PROPERTY_MAXNTID_Y
+ "maxntidz", // PROPERTY_MAXNTID_Z
+ "reqntidx", // PROPERTY_REQNTID_X
+ "reqntidy", // PROPERTY_REQNTID_Y
+ "reqntidz", // PROPERTY_REQNTID_Z
+ "minctasm", // PROPERTY_MINNCTAPERSM
+ "texture", // PROPERTY_ISTEXTURE
+ "surface", // PROPERTY_ISSURFACE
+ "sampler", // PROPERTY_ISSAMPLER
+ "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
+ "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "kernel", // PROPERTY_ISKERNEL_FUNCTION
+ "align", // PROPERTY_ALIGN
- // last property
- "proplast", // PROPERTY_LAST
+ // last property
+ "proplast", // PROPERTY_LAST
};
// name of named metadata used for global annotations
@@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
// compiling those .cpp files, hence __attribute__((unused)).
__attribute__((unused))
#endif
-static const char* NamedMDForAnnotations = "nvvm.annotations";
+ static const char *NamedMDForAnnotations = "nvvm.annotations";
}
-
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 1d4166575da5..459cd96cb0cd 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -23,15 +23,15 @@ bool CompileForDebugging;
// compile for debugging
static cl::opt<bool, true>
Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
- cl::location(CompileForDebugging),
- cl::init(false));
+ cl::location(CompileForDebugging), cl::init(false));
-void NVPTXMCAsmInfo::anchor() { }
+void NVPTXMCAsmInfo::anchor() {}
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::nvptx64)
- PointerSize = 8;
+ if (TheTriple.getArch() == Triple::nvptx64) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
CommentString = "//";
@@ -54,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Data32bitsDirective = " .b32 ";
Data64bitsDirective = " .b64 ";
PrivateGlobalPrefix = "";
- ZeroDirective = " .b8";
+ ZeroDirective = " .b8";
AsciiDirective = " .b8";
AscizDirective = " .b8";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 44aa01ca6e30..ccd29705df72 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -28,7 +28,6 @@
#define GET_REGINFO_MC_DESC
#include "NVPTXGenRegisterInfo.inc"
-
using namespace llvm;
static MCInstrInfo *createNVPTXMCInstrInfo() {
@@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
return X;
}
-static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+ StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
-
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
// Register the MC asm info.
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index b5684883fc95..d6c79b5110cc 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#ifndef LLVM_SUPPORT_MANAGED_STRING_H
#define LLVM_SUPPORT_MANAGED_STRING_H
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index a8d082a4d8b0..6a53a443bfb6 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -15,11 +15,11 @@
#ifndef LLVM_TARGET_NVPTX_H
#define LLVM_TARGET_NVPTX_H
-#include "llvm/Value.h"
-#include "llvm/Module.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
#include <cassert>
#include <iosfwd>
@@ -41,19 +41,24 @@ enum CondCodes {
inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
switch (CC) {
- case NVPTXCC::NE: return "ne";
- case NVPTXCC::EQ: return "eq";
- case NVPTXCC::LT: return "lt";
- case NVPTXCC::LE: return "le";
- case NVPTXCC::GT: return "gt";
- case NVPTXCC::GE: return "ge";
+ case NVPTXCC::NE:
+ return "ne";
+ case NVPTXCC::EQ:
+ return "eq";
+ case NVPTXCC::LT:
+ return "lt";
+ case NVPTXCC::LE:
+ return "le";
+ case NVPTXCC::GT:
+ return "gt";
+ case NVPTXCC::GE:
+ return "ge";
}
llvm_unreachable("Unknown condition code");
}
-FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel);
-FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
@@ -63,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
extern Target TheNVPTXTarget32;
extern Target TheNVPTXTarget64;
-namespace NVPTX
-{
+namespace NVPTX {
enum DrvInterface {
NVCL,
CUDA,
@@ -103,7 +107,7 @@ enum LoadStore {
};
namespace PTXLdStInstCode {
-enum AddressSpace{
+enum AddressSpace {
GENERIC = 0,
GLOBAL = 1,
CONSTANT = 2,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 7aee3595c625..d78b4e81a3e5 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td"
//===----------------------------------------------------------------------===//
// SM Versions
-def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10",
- "Target SM 1.0">;
-def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11",
- "Target SM 1.1">;
-def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12",
- "Target SM 1.2">;
-def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13",
- "Target SM 1.3">;
def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
"Target SM 2.0">;
def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
@@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"sm_10", [SM10]>;
-def : Proc<"sm_11", [SM11]>;
-def : Proc<"sm_12", [SM12]>;
-def : Proc<"sm_13", [SM13]>;
def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 668c39308f71..0f792ec6826e 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
#include "NVPTXAllocaHoisting.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
- bool functionModified = false;
- Function::iterator I = function.begin();
- TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+ bool functionModified = false;
+ Function::iterator I = function.begin();
+ TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
for (Function::iterator E = function.end(); I != E; ++I) {
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
@@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
}
char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
- "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+ "blocks to the entry block");
-FunctionPass *createAllocaHoisting() {
- return new NVPTXAllocaHoisting();
-}
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c7cabf695311..19d73c5783cb 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -15,8 +15,8 @@
#define NVPTX_ALLOCA_HOISTING_H_
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0a885ce1c4a6..ce5d78afa332 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -13,40 +13,40 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAsmPrinter.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
-#include "NVPTXTargetMachine.h"
+#include "NVPTXNumRegisters.h"
#include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXMCAsmInfo.h"
-#include "NVPTXNumRegisters.h"
+#include "cl_common_defines.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Support/TimeValue.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Support/Path.h"
-#include "llvm/Assembly/Writer.h"
-#include "cl_common_defines.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <sstream>
using namespace llvm;
-
#include "NVPTXGenAsmWriter.inc"
bool RegAllocNilUsed = true;
@@ -58,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers",
cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
cl::init(true));
-namespace llvm {
-bool InterleaveSrcInPtx = false;
-}
-
-static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specific: Emit source line in ptx file"),
- cl::location(llvm::InterleaveSrcInPtx));
+namespace llvm { bool InterleaveSrcInPtx = false; }
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Emit source line in ptx file"),
+ cl::location(llvm::InterleaveSrcInPtx));
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
-void DiscoverDependentGlobals(Value *V,
- DenseSet<GlobalVariable*> &Globals) {
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
@@ -87,12 +83,12 @@ void DiscoverDependentGlobals(Value *V,
/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
/// instances to be emitted, but only after any dependents have been added
/// first.
-void VisitGlobalVariableForEmission(GlobalVariable *GV,
- SmallVectorImpl<GlobalVariable*> &Order,
- DenseSet<GlobalVariable*> &Visited,
- DenseSet<GlobalVariable*> &Visiting) {
+void VisitGlobalVariableForEmission(
+ GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+ DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
// Have we already visited this one?
- if (Visited.count(GV)) return;
+ if (Visited.count(GV))
+ return;
// Do we have a circular dependency?
if (Visiting.count(GV))
@@ -102,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV,
Visiting.insert(GV);
// Make sure we visit all dependents first
- DenseSet<GlobalVariable*> Others;
+ DenseSet<GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
-
- for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
- E = Others.end(); I != E; ++I)
+
+ for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
+ I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
// Now we can visit ourself
@@ -141,43 +138,35 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
if (CE == 0)
llvm_unreachable("Unknown constant value to lower!");
-
switch (CE->getOpcode()) {
default:
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C =
- ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
if (C != CE)
return LowerConstant(C, AP);
// Otherwise report the problem to the user.
{
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- WriteAsOperand(OS, CE, /*PrintType=*/false,
- !AP.MF ? 0 : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/ false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
const DataLayout &TD = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
- const Constant *PtrVal = CE->getOperand(0);
- SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+ APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
- if (Offset == 0)
+ if (!OffsetAI)
return Base;
- // Truncate/sext the offset to the pointer size.
- if (TD.getPointerSizeInBits() != 64) {
- int SExtAmount = 64-TD.getPointerSizeInBits();
- Offset = (Offset << SExtAmount) >> SExtAmount;
- }
-
+ int64_t Offset = OffsetAI.getSExtValue();
return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
Ctx);
}
@@ -187,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// expression properly. This is important for differences between
// blockaddress labels. Since the two labels are in the same function, it
// is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
+ // FALL THROUGH.
case Instruction::BitCast:
return LowerConstant(CE->getOperand(0), AP);
@@ -197,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
- false/*ZExt*/);
+ false /*ZExt*/);
return LowerConstant(Op, AP);
}
@@ -219,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ const MCExpr *MaskExpr =
+ MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
}
- // The MC library also has a right-shift operator, but it isn't consistently
+ // The MC library also has a right-shift operator, but it isn't consistently
// signed or unsigned between different targets.
case Instruction::Add:
case Instruction::Sub:
@@ -237,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
- case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ default:
+ llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add:
+ return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub:
+ return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul:
+ return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv:
+ return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem:
+ return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl:
+ return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And:
+ return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or:
+ return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+ case Instruction::Xor:
+ return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
}
}
}
}
-
-void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
-{
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
return;
if (ignoreLoc(MI))
@@ -273,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
if (curLoc.isUnknown())
return;
-
const MachineFunction *MF = MI.getParent()->getParent();
//const TargetMachine &TM = MF->getTarget();
@@ -294,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
if (filenameMap.find(fileName.str()) == filenameMap.end())
return;
-
// Emit the line from the source file.
if (llvm::InterleaveSrcInPtx)
this->emitSrcInText(fileName.str(), curLoc.getLine());
std::stringstream temp;
- temp << "\t.loc " << filenameMap[fileName.str()]
- << " " << curLoc.getLine() << " " << curLoc.getCol();
+ temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+ << " " << curLoc.getCol();
OutStreamer.EmitRawText(Twine(temp.str().c_str()));
}
@@ -314,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitRawText(OS.str());
}
-void NVPTXAsmPrinter::printReturnValStr(const Function *F,
- raw_ostream &O)
-{
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
const TargetLowering *TLI = TM.getTargetLowering();
@@ -334,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
unsigned size = 0;
if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
- if (size < 32) size = 32;
+ if (size < 32)
+ size = 32;
} else {
- assert(Ty->isFloatingPointTy() &&
- "Floating point type expected here");
+ assert(Ty->isFloatingPointTy() && "Floating point type expected here");
size = Ty->getPrimitiveSizeInBits();
}
O << ".param .b" << size << " func_retval0";
- }
- else if (isa<PointerType>(Ty)) {
+ } else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy().getSizeInBits()
- << " func_retval0";
+ << " func_retval0";
} else {
- if ((Ty->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(Ty)) {
+ if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
unsigned totalsz = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- totalsz += sz/8;
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
}
}
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
retAlignment = TD->getABITypeAlignment(Ty);
- O << ".param .align "
- << retAlignment
- << " .b8 func_retval0["
- << totalsz << "]";
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
} else
- assert(false &&
- "Unknown return type");
+ assert(false && "Unknown return type");
}
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
unsigned idx = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -388,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " func_retval" << idx;
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
++idx;
}
- if (i < e-1)
+ if (i < e - 1)
O << ", ";
}
}
@@ -416,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
- emitLinkageDirective(F,O);
+ emitLinkageDirective(F, O);
if (llvm::isKernelFunction(*F))
O << ".entry ";
else {
@@ -439,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
unsigned numRegClasses = TRI.getNumRegClasses();
- VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+ VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
OutStreamer.EmitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
@@ -451,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
OutStreamer.EmitRawText(StringRef("}\n"));
- delete []VRidGlobal2LocalMap;
+ delete[] VRidGlobal2LocalMap;
}
-
-void
-NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
- raw_ostream &O) const {
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+ raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
- else specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
- else specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
- else specified = true;
+ if (llvm::getReqNTIDx(F, reqntidx) == false)
+ reqntidx = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDy(F, reqntidy) == false)
+ reqntidy = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDz(F, reqntidz) == false)
+ reqntidz = 1;
+ else
+ specified = true;
if (specified)
- O << ".reqntid " << reqntidx << ", "
- << reqntidy << ", " << reqntidz << "\n";
+ O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+ << "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
- else specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
- else specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
- else specified = true;
+ if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ maxntidx = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ maxntidy = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ maxntidz = 1;
+ else
+ specified = true;
if (specified)
- O << ".maxntid " << maxntidx << ", "
- << maxntidy << ", " << maxntidz << "\n";
+ O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+ << "\n";
unsigned mincta;
if (llvm::getMinCTASm(F, mincta))
O << ".minnctapersm " << mincta << "\n";
}
-void
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
- raw_ostream &O) {
- const TargetRegisterClass * RC = MRI->getRegClass(vr);
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+ raw_ostream &O) {
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
unsigned id = RC->getID();
std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
@@ -508,61 +509,41 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
O << getNVPTXRegClassStr(RC) << mapped_vr;
return;
}
- // Vector virtual register
- if (getNVPTXVectorSize(RC) == 4)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
- << "}";
- else if (getNVPTXVectorSize(RC) == 2)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
- << "}";
- else
- llvm_unreachable("Unsupported vector size");
+ report_fatal_error("Bad register!");
}
-void
-NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
- raw_ostream &O) {
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+ raw_ostream &O) {
getVirtualRegisterName(vr, isVec, O);
}
-void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
- const char *Modifier,
- raw_ostream &O) {
- static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
- int Imm = (int)MO.getImm();
- if(0 == strcmp(Modifier, "vecelem"))
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+ const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+ static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+ int Imm = (int) MO.getImm();
+ if (0 == strcmp(Modifier, "vecelem"))
O << "_" << vecelem[Imm];
- else if(0 == strcmp(Modifier, "vecv4comm1")) {
- if((Imm < 0) || (Imm > 3))
+ else if (0 == strcmp(Modifier, "vecv4comm1")) {
+ if ((Imm < 0) || (Imm > 3))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv4comm2")) {
- if((Imm < 4) || (Imm > 7))
+ } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+ if ((Imm < 4) || (Imm > 7))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv4pos")) {
- if(Imm < 0) Imm = 0;
- O << "_" << vecelem[Imm%4];
- }
- else if(0 == strcmp(Modifier, "vecv2comm1")) {
- if((Imm < 0) || (Imm > 1))
+ } else if (0 == strcmp(Modifier, "vecv4pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 4];
+ } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+ if ((Imm < 0) || (Imm > 1))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv2comm2")) {
- if((Imm < 2) || (Imm > 3))
+ } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+ if ((Imm < 2) || (Imm > 3))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv2pos")) {
- if(Imm < 0) Imm = 0;
- O << "_" << vecelem[Imm%2];
- }
- else
+ } else if (0 == strcmp(Modifier, "vecv2pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 2];
+ } else
llvm_unreachable("Unknown Modifier on immediate operand");
}
@@ -584,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
emitVirtualRegister(MO.getReg(), true, O);
else
llvm_unreachable(
- "Don't know how to handle the modifier on virtual register.");
+ "Don't know how to handle the modifier on virtual register.");
}
}
return;
@@ -595,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
else if (strstr(Modifier, "vec") == Modifier)
printVecModifiedImmediate(MO, Modifier, O);
else
- llvm_unreachable("Don't know how to handle modifier on immediate operand");
+ llvm_unreachable(
+ "Don't know how to handle modifier on immediate operand");
return;
case MachineOperand::MO_FPImmediate:
@@ -607,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_ExternalSymbol: {
- const char * symbname = MO.getSymbolName();
+ const char *symbname = MO.getSymbolName();
if (strstr(symbname, ".PARAM") == symbname) {
unsigned index;
- sscanf(symbname+6, "%u[];", &index);
+ sscanf(symbname + 6, "%u[];", &index);
printParamName(index, O);
- }
- else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ } else if (strstr(symbname, ".HLPPARAM") == symbname) {
unsigned index;
- sscanf(symbname+9, "%u[];", &index);
+ sscanf(symbname + 9, "%u[];", &index);
O << *CurrentFnSym << "_param_" << index << "_offset";
- }
- else
+ } else
O << symbname;
break;
}
@@ -632,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
}
-void NVPTXAsmPrinter::
-printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+ raw_ostream &O) const {
#ifndef __OPTIMIZE__
O << "\t// Implicit def :";
//printOperand(MI, 0);
@@ -647,64 +627,69 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
if (Modifier && !strcmp(Modifier, "add")) {
O << ", ";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, O);
} else {
- if (MI->getOperand(opNum+1).isImm() &&
- MI->getOperand(opNum+1).getImm() == 0)
+ if (MI->getOperand(opNum + 1).isImm() &&
+ MI->getOperand(opNum + 1).getImm() == 0)
return; // don't print ',0' or '+0'
O << "+";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, O);
}
}
void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
- raw_ostream &O, const char *Modifier)
-{
+ raw_ostream &O, const char *Modifier) {
if (Modifier) {
const MachineOperand &MO = MI->getOperand(opNum);
- int Imm = (int)MO.getImm();
+ int Imm = (int) MO.getImm();
if (!strcmp(Modifier, "volatile")) {
if (Imm)
O << ".volatile";
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
- case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
- case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
- case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
- case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
- case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+ case NVPTX::PTXLdStInstCode::GLOBAL:
+ O << ".global";
+ break;
+ case NVPTX::PTXLdStInstCode::SHARED:
+ O << ".shared";
+ break;
+ case NVPTX::PTXLdStInstCode::LOCAL:
+ O << ".local";
+ break;
+ case NVPTX::PTXLdStInstCode::PARAM:
+ O << ".param";
+ break;
+ case NVPTX::PTXLdStInstCode::CONSTANT:
+ O << ".const";
+ break;
case NVPTX::PTXLdStInstCode::GENERIC:
if (!nvptxSubtarget.hasGenericLdSt())
O << ".global";
break;
default:
- assert("wrong value");
+ llvm_unreachable("Wrong Address Space");
}
- }
- else if (!strcmp(Modifier, "sign")) {
- if (Imm==NVPTX::PTXLdStInstCode::Signed)
+ } else if (!strcmp(Modifier, "sign")) {
+ if (Imm == NVPTX::PTXLdStInstCode::Signed)
O << "s";
- else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+ else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
O << "u";
else
O << "f";
- }
- else if (!strcmp(Modifier, "vec")) {
- if (Imm==NVPTX::PTXLdStInstCode::V2)
+ } else if (!strcmp(Modifier, "vec")) {
+ if (Imm == NVPTX::PTXLdStInstCode::V2)
O << ".v2";
- else if (Imm==NVPTX::PTXLdStInstCode::V4)
+ else if (Imm == NVPTX::PTXLdStInstCode::V4)
O << ".v4";
- }
- else
- assert("unknown modifier");
- }
- else
- assert("unknown modifier");
+ } else
+ llvm_unreachable("Unknown Modifier");
+ } else
+ llvm_unreachable("Empty Modifier");
}
-void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
- emitLinkageDirective(F,O);
+ emitLinkageDirective(F, O);
if (llvm::isKernelFunction(*F))
O << ".entry ";
else
@@ -715,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
O << ";\n";
}
-static bool usedInGlobalVarDef(const Constant *C)
-{
+static bool usedInGlobalVarDef(const Constant *C) {
if (!C)
return false;
@@ -726,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C)
return true;
}
- for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
- ui!=ue; ++ui) {
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
const Constant *C = dyn_cast<Constant>(*ui);
if (usedInGlobalVarDef(C))
return true;
@@ -735,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C)
return false;
}
-static bool usedInOneFunc(const User *U, Function const *&oneFunc)
-{
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
if (othergv->getName().str() == "llvm.used")
return true;
@@ -749,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc)
return false;
oneFunc = curFunc;
return true;
- }
- else
+ } else
return false;
}
if (const MDNode *md = dyn_cast<MDNode>(U))
if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
- (md->getName().str() == "llvm.dbg.sp")))
+ (md->getName().str() == "llvm.dbg.sp")))
return true;
-
- for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
- ui!=ue; ++ui) {
+ for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+ ui != ue; ++ui) {
if (usedInOneFunc(*ui, oneFunc) == false)
return false;
}
@@ -795,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
static bool useFuncSeen(const Constant *C,
llvm::DenseMap<const Function *, bool> &seenMap) {
- for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
- ui!=ue; ++ui) {
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
if (const Constant *cu = dyn_cast<Constant>(*ui)) {
if (useFuncSeen(cu, seenMap))
return true;
} else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
const BasicBlock *bb = I->getParent();
- if (!bb) continue;
+ if (!bb)
+ continue;
const Function *caller = bb->getParent();
- if (!caller) continue;
+ if (!caller)
+ continue;
if (seenMap.find(caller) != seenMap.end())
return true;
}
@@ -812,10 +795,9 @@ static bool useFuncSeen(const Constant *C,
return false;
}
-void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
- for (Module::const_iterator FI=M.begin(), FE=M.end();
- FI!=FE; ++FI) {
+ for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
if (F->isDeclaration()) {
@@ -827,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
emitDeclaration(F, O);
continue;
}
- for (Value::const_use_iterator iter=F->use_begin(),
- iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+ for (Value::const_use_iterator iter = F->use_begin(),
+ iterEnd = F->use_end();
+ iter != iterEnd; ++iter) {
if (const Constant *C = dyn_cast<Constant>(*iter)) {
if (usedInGlobalVarDef(C)) {
// The use is in the initialization of a global variable
@@ -847,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
}
}
- if (!isa<Instruction>(*iter)) continue;
+ if (!isa<Instruction>(*iter))
+ continue;
const Instruction *instr = cast<Instruction>(*iter);
const BasicBlock *bb = instr->getParent();
- if (!bb) continue;
+ if (!bb)
+ continue;
const Function *caller = bb->getParent();
- if (!caller) continue;
+ if (!caller)
+ continue;
// If a caller has already been seen, then the caller is
// appearing in the module before the callee. so print out
@@ -871,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
DebugInfoFinder DbgFinder;
DbgFinder.processModule(M);
- unsigned i=1;
+ unsigned i = 1;
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ E = DbgFinder.compile_unit_end();
+ I != E; ++I) {
DICompileUnit DIUnit(*I);
StringRef Filename(DIUnit.getFilename());
StringRef Dirname(DIUnit.getDirectory());
@@ -890,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
}
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
- E = DbgFinder.subprogram_end(); I != E; ++I) {
+ E = DbgFinder.subprogram_end();
+ I != E; ++I) {
DISubprogram SP(*I);
StringRef Filename(SP.getFilename());
StringRef Dirname(SP.getDirectory());
@@ -906,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
}
}
-bool NVPTXAsmPrinter::doInitialization (Module &M) {
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
SmallString<128> Str1;
raw_svector_ostream OS1(Str1);
@@ -918,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
//bool Result = AsmPrinter::doInitialization(M);
// Initialize TargetLoweringObjectFile.
- const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
- .Initialize(OutContext, TM);
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .Initialize(OutContext, TM);
Mang = new Mangler(OutContext, *TM.getDataLayout());
@@ -927,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
emitHeader(M, OS1);
OutStreamer.EmitRawText(OS1.str());
-
// Already commented out
//bool Result = AsmPrinter::doInitialization(M);
-
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
@@ -945,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
- SmallVector<GlobalVariable*,8> Globals;
- DenseSet<GlobalVariable*> GVVisited;
- DenseSet<GlobalVariable*> GVVisiting;
+ SmallVector<GlobalVariable *, 8> Globals;
+ DenseSet<GlobalVariable *> GVVisited;
+ DenseSet<GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+ ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
- assert(GVVisited.size() == M.getGlobalList().size() &&
+ assert(GVVisited.size() == M.getGlobalList().size() &&
"Missed a global variable");
assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
@@ -965,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
- return false; // success
+ return false; // success
}
-void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
O << "//\n";
O << "// Generated by LLVM NVPTX Back-End\n";
O << "//\n";
@@ -1008,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
Module::GlobalListType &global_list = M.getGlobalList();
int i, n = global_list.size();
- GlobalVariable **gv_array = new GlobalVariable* [n];
+ GlobalVariable **gv_array = new GlobalVariable *[n];
// first, back-up GlobalVariable in gv_array
i = 0;
for (Module::global_iterator I = global_list.begin(), E = global_list.end();
- I != E; ++I)
+ I != E; ++I)
gv_array[i++] = &*I;
// second, empty global_list
@@ -1024,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
bool ret = AsmPrinter::doFinalization(M);
// now we restore global variables
- for (i = 0; i < n; i ++)
+ for (i = 0; i < n; i++)
global_list.insert(global_list.end(), gv_array[i]);
delete[] gv_array;
return ret;
-
//bool Result = AsmPrinter::doFinalization(M);
// Instead of calling the parents doFinalization, we may
// clone parents doFinalization and customize here.
@@ -1050,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
// external without init -> .extern
// appending -> not allowed, assert.
-void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
-{
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+ raw_ostream &O) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
if (V->hasExternalLinkage()) {
if (isa<GlobalVariable>(V)) {
@@ -1078,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
}
}
-
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
bool processDemoted) {
// Skip meta data
@@ -1130,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
- unsigned sample=CI->getZExtValue();
+ unsigned sample = CI->getZExtValue();
O << " = { ";
- for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
- __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+ for (int i = 0,
+ addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+ i < 3; i++) {
O << "addr_mode_" << i << " = ";
switch (addr) {
- case 0: O << "wrap"; break;
- case 1: O << "clamp_to_border"; break;
- case 2: O << "clamp_to_edge"; break;
- case 3: O << "wrap"; break;
- case 4: O << "mirror"; break;
+ case 0:
+ O << "wrap";
+ break;
+ case 1:
+ O << "clamp_to_border";
+ break;
+ case 2:
+ O << "clamp_to_edge";
+ break;
+ case 3:
+ O << "wrap";
+ break;
+ case 4:
+ O << "mirror";
+ break;
}
- O <<", ";
+ O << ", ";
}
O << "filter_mode = ";
- switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
- case 0: O << "nearest"; break;
- case 1: O << "linear"; break;
- case 2: assert ( 0 && "Anisotropic filtering is not supported");
- default: O << "nearest"; break;
+ switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+ case 0:
+ O << "nearest";
+ break;
+ case 1:
+ O << "linear";
+ break;
+ case 2:
+ assert(0 && "Anisotropic filtering is not supported");
+ default:
+ O << "nearest";
+ break;
}
- if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+ if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
O << ", force_unnormalized_coords = 1";
}
O << " }";
@@ -1195,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
else
O << " .align " << GVar->getAlignment();
-
if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
O << " .";
O << getPTXFundamentalTypeStr(ETy, false);
@@ -1205,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
// Ptx allows variable initilization only for constant and global state
// spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
- && GVar->hasInitializer()) {
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
- O << " = " ;
+ O << " = ";
printScalarConstant(Initializer, O);
}
}
} else {
- unsigned int ElementSize =0;
+ unsigned int ElementSize = 0;
// Although PTX has direct support for struct type and array type and
// LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1229,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
- && GVar->hasInitializer()) {
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
- if (!isa<UndefValue>(Initializer) &&
- !Initializer->isNullValue()) {
+ if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (nvptxSubtarget.is64Bit()) {
- O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
- O << ElementSize/8;
- }
- else {
- O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
- O << ElementSize/4;
+ O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 8;
+ } else {
+ O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 4;
}
O << "]";
- }
- else {
- O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
O << ElementSize;
O << "]";
}
- O << " = {" ;
+ O << " = {";
aggBuffer.print();
O << "}";
- }
- else {
- O << " .b8 " << *Mang->getSymbol(GVar) ;
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
if (ElementSize) {
- O <<"[" ;
+ O << "[";
O << ElementSize;
O << "]";
}
}
- }
- else {
+ } else {
O << " .b8 " << *Mang->getSymbol(GVar);
if (ElementSize) {
- O <<"[" ;
+ O << "[";
O << ElementSize;
O << "]";
}
}
break;
default:
- assert( 0 && "type not supported yet");
+ assert(0 && "type not supported yet");
}
}
@@ -1289,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
std::vector<GlobalVariable *> &gvars = localDecls[f];
- for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
printModuleLevelGV(gvars[i], O, true);
}
@@ -1299,32 +1295,33 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
raw_ostream &O) const {
switch (AddressSpace) {
case llvm::ADDRESS_SPACE_LOCAL:
- O << "local" ;
+ O << "local";
break;
case llvm::ADDRESS_SPACE_GLOBAL:
- O << "global" ;
+ O << "global";
break;
case llvm::ADDRESS_SPACE_CONST:
// This logic should be consistent with that in
// getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
if (nvptxSubtarget.hasGenericLdSt())
- O << "global" ;
+ O << "global";
else
- O << "const" ;
+ O << "const";
break;
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
- O << "const" ;
+ O << "const";
break;
case llvm::ADDRESS_SPACE_SHARED:
- O << "shared" ;
+ O << "shared";
break;
default:
- llvm_unreachable("unexpected address space");
+ report_fatal_error("Bad address space found while emitting PTX");
+ break;
}
}
-std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
- bool useB4PTR) const {
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
default:
llvm_unreachable("unexpected type");
@@ -1348,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
return "f64";
case Type::PointerTyID:
if (nvptxSubtarget.is64Bit())
- if (useB4PTR) return "b64";
- else return "u64";
+ if (useB4PTR)
+ return "b64";
+ else
+ return "u64";
+ else if (useB4PTR)
+ return "b32";
else
- if (useB4PTR) return "b32";
- else return "u32";
+ return "u32";
}
llvm_unreachable("unexpected type");
return NULL;
}
-void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
@@ -1382,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
return;
}
- int64_t ElementSize =0;
+ int64_t ElementSize = 0;
// Although PTX has direct support for struct type and array type and LLVM IR
// is very similar to PTX, the LLVM CodeGen does not support for targets that
@@ -1393,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
if (ElementSize) {
- O << itostr(ElementSize) ;
+ O << itostr(ElementSize);
}
O << "]";
break;
default:
- assert( 0 && "type not supported yet");
+ assert(0 && "type not supported yet");
}
- return ;
+ return;
}
-
-static unsigned int
-getOpenCLAlignment(const DataLayout *TD,
- Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
return TD->getPrefTypeAlignment(Ty);
@@ -1422,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD,
unsigned int numE = VTy->getNumElements();
unsigned int alignE = TD->getPrefTypeAlignment(ETy);
if (numE == 3)
- return 4*alignE;
+ return 4 * alignE;
else
- return numE*alignE;
+ return numE * alignE;
}
const StructType *STy = dyn_cast<StructType>(Ty);
@@ -1432,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD,
unsigned int alignStruct = 1;
// Go through each element of the struct and find the
// largest alignment.
- for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
Type *ETy = STy->getElementType(i);
unsigned int align = getOpenCLAlignment(TD, ETy);
if (align > alignStruct)
@@ -1476,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
}
for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
- if (i==paramIndex) {
+ if (i == paramIndex) {
printParamName(I, paramIndex, O);
return;
}
@@ -1484,10 +1481,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
llvm_unreachable("paramIndex out of bound");
}
-void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
- raw_ostream &O) {
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
- const AttrListPtr &PAL = F->getAttributes();
+ const AttributeSet &PAL = F->getAttributes();
const TargetLowering *TLI = TM.getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
@@ -1499,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << "(\n";
for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!first)
O << ",\n";
@@ -1514,15 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
else // Default image is read_only
O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
- }
- else // Should be llvm::isSampler(*I)
+ } else // Should be llvm::isSampler(*I)
O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
- << paramIndex;
+ << paramIndex;
continue;
}
- if (PAL.getParamAttributes(paramIndex+1).
- hasAttribute(Attributes::ByVal) == false) {
+ if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (Ty->isVectorTy()) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
+ if (align == 0)
+ align = TD->getABITypeAlignment(Ty);
+
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ O << "\t.param .align " << align << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+
+ continue;
+ }
// Just a scalar
const PointerType *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
@@ -1533,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
Type *ETy = PTy->getElementType();
int addrSpace = PTy->getAddressSpace();
- switch(addrSpace) {
+ switch (addrSpace) {
default:
O << ".ptr ";
break;
@@ -1548,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << ".ptr .global ";
break;
}
- O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+ O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
}
printParamName(I, paramIndex, O);
continue;
}
// non-pointer scalar to kernel func
- O << "\t.param ."
- << getPTXFundamentalTypeStr(Ty) << " ";
+ O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
printParamName(I, paramIndex, O);
continue;
}
@@ -1565,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32) sz = 32;
- }
- else if (isa<PointerType>(Ty))
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
@@ -1581,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
// param has byVal attribute. So should be a pointer
const PointerType *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy &&
- "Param with byval attribute should be a pointer type");
+ assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
if (isABI || isKernelFunc) {
// Just print .param .b8 .align <a> .param[size];
// <a> = PAL.getparamalignment
// size = typeallocsize of element type
- unsigned align = PAL.getParamAlignment(paramIndex+1);
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
align = TD->getABITypeAlignment(ETy);
unsigned sz = TD->getTypeAllocSize(ETy);
- O << "\t.param .align " << align
- << " .b8 ";
+ O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
continue;
@@ -1606,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
// each vector element.
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, ETy, vtparts);
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -1614,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << "\t.reg .b" << sz << " ";
printParamName(I, paramIndex, O);
- if (j<je-1) O << ",\n";
+ if (j < je - 1)
+ O << ",\n";
++paramIndex;
}
- if (i<e-1)
+ if (i < e - 1)
O << ",\n";
}
--paramIndex;
@@ -1639,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
emitFunctionParamList(F, O);
}
-
-void NVPTXAsmPrinter::
-setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+ const MachineFunction &MF) {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -1654,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
int NumBytes = (int) MFI->getStackSize();
if (NumBytes) {
- O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
- << DEPOTNAME
- << getFunctionNumber() << "[" << NumBytes << "];\n";
+ O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+ << getFunctionNumber() << "[" << NumBytes << "];\n";
if (nvptxSubtarget.is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
- }
- else {
+ } else {
O << "\t.reg .b32 \t%SP;\n";
O << "\t.reg .b32 \t%SPL;\n";
}
@@ -1672,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
// register number and the per class virtual register number.
// We use the per class virtual register number in the ptx output.
unsigned int numVRs = MRI->getNumVirtRegs();
- for (unsigned i=0; i< numVRs; i++) {
+ for (unsigned i = 0; i < numVRs; i++) {
unsigned int vr = TRI->index2VirtReg(i);
const TargetRegisterClass *RC = MRI->getRegClass(vr);
std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
int n = regmap.size();
- regmap.insert(std::make_pair(vr, n+1));
+ regmap.insert(std::make_pair(vr, n + 1));
}
// Emit register declarations
@@ -1721,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
OutStreamer.EmitRawText(O.str());
}
-
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
- APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
+ APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
bool ignored;
unsigned int numHex;
const char *lead;
- if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+ if (Fp->getType()->getTypeID() == Type::FloatTyID) {
numHex = 8;
lead = "0f";
- APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
- &ignored);
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
} else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
numHex = 16;
lead = "0d";
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
- &ignored);
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
} else
llvm_unreachable("unsupported fp type");
@@ -1779,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-
void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
@@ -1787,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
- if (s<Bytes)
+ if (s < Bytes)
s = Bytes;
aggBuffer->addZeros(s);
return;
@@ -1798,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
- if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+ if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c =
(unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
ptr = &c;
aggBuffer->addBytes(ptr, 1, Bytes);
- } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
- short int16 =
- (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
- ptr = (unsigned char*)&int16;
+ } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+ short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
- } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+ } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- int int32 =(int)(constInt->getZExtValue());
- ptr = (unsigned char*)&int32;
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt =
- dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
- Cexpr, TD))) {
- int int32 =(int)(constInt->getZExtValue());
- ptr = (unsigned char*)&int32;
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
}
@@ -1831,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
llvm_unreachable("unsupported integer const type");
- } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+ } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- long long int64 =(long long)(constInt->getZExtValue());
- ptr = (unsigned char*)&int64;
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
- long long int64 =(long long)(constInt->getZExtValue());
- ptr = (unsigned char*)&int64;
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
}
@@ -1860,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::FloatTyID:
case Type::DoubleTyID: {
ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
- const Type* Ty = CFP->getType();
+ const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
- float float32 = (float)CFP->getValueAPF().convertToFloat();
- ptr = (unsigned char*)&float32;
+ float float32 = (float) CFP->getValueAPF().convertToFloat();
+ ptr = (unsigned char *)&float32;
aggBuffer->addBytes(ptr, 4, Bytes);
} else if (Ty == Type::getDoubleTy(CPV->getContext())) {
double float64 = CFP->getValueAPF().convertToDouble();
- ptr = (unsigned char*)&float64;
+ ptr = (unsigned char *)&float64;
aggBuffer->addBytes(ptr, 8, Bytes);
- }
- else {
+ } else {
llvm_unreachable("unsupported fp const type");
}
break;
@@ -1878,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::PointerTyID: {
if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
- }
- else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
@@ -1895,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
isa<ConstantStruct>(CPV)) {
int ElementSize = TD->getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
- if ( Bytes > ElementSize )
- aggBuffer->addZeros(Bytes-ElementSize);
- }
- else if (isa<ConstantAggregateZero>(CPV))
+ if (Bytes > ElementSize)
+ aggBuffer->addZeros(Bytes - ElementSize);
+ } else if (isa<ConstantAggregateZero>(CPV))
aggBuffer->addZeros(Bytes);
else
llvm_unreachable("Unexpected Constant type");
@@ -1924,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
}
if (const ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(CPV)) {
+ dyn_cast<ConstantDataSequential>(CPV)) {
if (CDS->getNumElements())
for (unsigned i = 0; i < CDS->getNumElements(); ++i)
bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
@@ -1932,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
return;
}
-
if (isa<ConstantStruct>(CPV)) {
if (CPV->getNumOperands()) {
StructType *ST = cast<StructType>(CPV->getType());
for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
- if ( i == (e - 1))
+ if (i == (e - 1))
Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
- TD->getTypeAllocSize(ST)
- - TD->getStructLayout(ST)->getElementOffset(i);
+ TD->getTypeAllocSize(ST) -
+ TD->getStructLayout(ST)->getElementOffset(i);
else
- Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
- TD->getStructLayout(ST)->getElementOffset(i);
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
- aggBuffer);
+ Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
}
}
return;
@@ -1956,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
// buildTypeNameMap - Run through symbol table looking for type names.
//
-
bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
- if (PI != TypeNameMap.end() &&
- (!PI->second.compare("struct._image1d_t") ||
- !PI->second.compare("struct._image2d_t") ||
- !PI->second.compare("struct._image3d_t")))
+ if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+ !PI->second.compare("struct._image2d_t") ||
+ !PI->second.compare("struct._image3d_t")))
return true;
return false;
@@ -1974,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
///
bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
@@ -1993,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+ const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
+ return true; // Unknown modifier
O << '[';
printMemOperand(MI, OpNo, O);
@@ -2008,71 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
-{
- switch(MI.getOpcode()) {
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
- case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0:
- case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32:
- case NVPTX::CallArgF64: case NVPTX::CallArgI16:
- case NVPTX::CallArgI32: case NVPTX::CallArgI32imm:
- case NVPTX::CallArgI64: case NVPTX::CallArgI8:
- case NVPTX::CallArgParam: case NVPTX::CallVoidInst:
- case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End:
+ case NVPTX::CallArgBeginInst:
+ case NVPTX::CallArgEndInst0:
+ case NVPTX::CallArgEndInst1:
+ case NVPTX::CallArgF32:
+ case NVPTX::CallArgF64:
+ case NVPTX::CallArgI16:
+ case NVPTX::CallArgI32:
+ case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgI64:
+ case NVPTX::CallArgI8:
+ case NVPTX::CallArgParam:
+ case NVPTX::CallVoidInst:
+ case NVPTX::CallVoidInstReg:
+ case NVPTX::Callseq_End:
case NVPTX::CallVoidInstReg64:
- case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst:
- case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst:
- case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst:
- case NVPTX::StoreParamF32: case NVPTX::StoreParamF64:
- case NVPTX::StoreParamI16: case NVPTX::StoreParamI32:
- case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
- case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
- case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
- case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
- case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
- case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
- case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
- case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
- case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
- case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
- case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
- case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
- case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
- case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
- case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
- case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
- case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
- case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
- case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
- case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
- case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
- case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
- case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
- case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
- case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
- case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
- case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
- case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
- case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
- case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam:
- case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64:
- case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32:
- case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8:
- case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
- case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
- case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
- case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
- case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
- case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
- case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
- case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
- case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
- case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
- case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
- case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
- case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
- case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
+ case NVPTX::DeclareParamInst:
+ case NVPTX::DeclareRetMemInst:
+ case NVPTX::DeclareRetRegInst:
+ case NVPTX::DeclareRetScalarInst:
+ case NVPTX::DeclareScalarParamInst:
+ case NVPTX::DeclareScalarRegInst:
+ case NVPTX::StoreParamF32:
+ case NVPTX::StoreParamF64:
+ case NVPTX::StoreParamI16:
+ case NVPTX::StoreParamI32:
+ case NVPTX::StoreParamI64:
+ case NVPTX::StoreParamI8:
+ case NVPTX::StoreParamS32I8:
+ case NVPTX::StoreParamU32I8:
+ case NVPTX::StoreParamS32I16:
+ case NVPTX::StoreParamU32I16:
+ case NVPTX::StoreRetvalF32:
+ case NVPTX::StoreRetvalF64:
+ case NVPTX::StoreRetvalI16:
+ case NVPTX::StoreRetvalI32:
+ case NVPTX::StoreRetvalI64:
+ case NVPTX::StoreRetvalI8:
+ case NVPTX::LastCallArgF32:
+ case NVPTX::LastCallArgF64:
+ case NVPTX::LastCallArgI16:
+ case NVPTX::LastCallArgI32:
+ case NVPTX::LastCallArgI32imm:
+ case NVPTX::LastCallArgI64:
+ case NVPTX::LastCallArgI8:
+ case NVPTX::LastCallArgParam:
+ case NVPTX::LoadParamMemF32:
+ case NVPTX::LoadParamMemF64:
+ case NVPTX::LoadParamMemI16:
+ case NVPTX::LoadParamMemI32:
+ case NVPTX::LoadParamMemI64:
+ case NVPTX::LoadParamMemI8:
+ case NVPTX::LoadParamRegF32:
+ case NVPTX::LoadParamRegF64:
+ case NVPTX::LoadParamRegI16:
+ case NVPTX::LoadParamRegI32:
+ case NVPTX::LoadParamRegI64:
+ case NVPTX::LoadParamRegI8:
+ case NVPTX::PrototypeInst:
+ case NVPTX::DBG_VALUE:
return true;
}
return false;
@@ -2084,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
- LineReader * reader = this->getReader(filename.str());
+ LineReader *reader = this->getReader(filename.str());
temp << "\n//";
temp << filename.str();
temp << ":";
@@ -2098,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
this->OutStreamer.EmitRawText(Twine(temp.str()));
}
-
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
- if (reader == NULL) {
- reader = new LineReader(filename);
+ if (reader == NULL) {
+ reader = new LineReader(filename);
}
if (reader->fileName() != filename) {
delete reader;
- reader = new LineReader(filename);
+ reader = new LineReader(filename);
}
return reader;
}
-
-std::string
-LineReader::readLine(unsigned lineNum) {
+std::string LineReader::readLine(unsigned lineNum) {
if (lineNum < theCurLine) {
theCurLine = 0;
- fstr.seekg(0,std::ios::beg);
+ fstr.seekg(0, std::ios::beg);
}
while (theCurLine < lineNum) {
- fstr.getline(buff,500);
+ fstr.getline(buff, 500);
theCurLine++;
}
return buff;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6488b1442580..6dc9fc0ffeff 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -16,19 +16,19 @@
#define NVPTXASMPRINTER_H
#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
#include "NVPTXSubtarget.h"
-#include "llvm/Function.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
#include <fstream>
// The ptx syntax and format is very different from that usually seem in a .s
@@ -43,15 +43,15 @@
// This is defined in AsmPrinter.cpp.
// Used to process the constant expressions in initializers.
namespace nvptx {
-const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
- llvm::AsmPrinter &AP) ;
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
}
namespace llvm {
class LineReader {
private:
- unsigned theCurLine ;
+ unsigned theCurLine;
std::ifstream fstr;
char buff[512];
std::string theFileName;
@@ -63,17 +63,12 @@ public:
theFileName = filename;
}
std::string fileName() { return theFileName; }
- ~LineReader() {
- fstr.close();
- }
+ ~LineReader() { fstr.close(); }
std::string readLine(unsigned line);
};
-
-
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
-
class AggBuffer {
// Used to buffer the emitted string for initializing global
// aggregates.
@@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
- unsigned size; // size of the buffer in bytes
+ unsigned size; // size of the buffer in bytes
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
@@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- :O(_O),AP(_AP) {
+ : O(_O), AP(_AP) {
buffer = new unsigned char[_size];
size = _size;
curpos = 0;
numSymbols = 0;
}
- ~AggBuffer() {
- delete [] buffer;
- }
+ ~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
- assert((curpos+Num) <= size);
- assert((curpos+Bytes) <= size);
- for ( int i= 0; i < Num; ++i) {
+ assert((curpos + Num) <= size);
+ assert((curpos + Bytes) <= size);
+ for (int i = 0; i < Num; ++i) {
buffer[curpos] = Ptr[i];
- curpos ++;
+ curpos++;
}
- for ( int i=Num; i < Bytes ; ++i) {
+ for (int i = Num; i < Bytes; ++i) {
buffer[curpos] = 0;
- curpos ++;
+ curpos++;
}
return curpos;
}
unsigned addZeros(int Num) {
- assert((curpos+Num) <= size);
- for ( int i= 0; i < Num; ++i) {
+ assert((curpos + Num) <= size);
+ for (int i = 0; i < Num; ++i) {
buffer[curpos] = 0;
- curpos ++;
+ curpos++;
}
return curpos;
}
@@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
void print() {
if (numSymbols == 0) {
// print out in bytes
- for (unsigned i=0; i<size; i++) {
+ for (unsigned i = 0; i < size; i++) {
if (i)
O << ", ";
- O << (unsigned int)buffer[i];
+ O << (unsigned int) buffer[i];
}
} else {
// print out in 4-bytes or 8-bytes
@@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned int nBytes = 4;
if (AP.nvptxSubtarget.is64Bit())
nBytes = 8;
- for (pos=0; pos<size; pos+=nBytes) {
+ for (pos = 0; pos < size; pos += nBytes) {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
@@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
- }
- else if (ConstantExpr *Cexpr =
- dyn_cast<ConstantExpr>(v)) {
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
nSym++;
if (nSym >= numSymbols)
- nextSymbolPos = size+1;
+ nextSymbolPos = size + 1;
else
nextSymbolPos = symbolPosInBuffer[nSym];
- } else
- if (nBytes == 4)
- O << *(unsigned int*)(buffer+pos);
- else
- O << *(unsigned long long*)(buffer+pos);
+ } else if (nBytes == 4)
+ O << *(unsigned int *)(buffer + pos);
+ else
+ O << *(unsigned long long *)(buffer + pos);
}
}
}
@@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
virtual void emitSrcInText(StringRef filename, unsigned line);
-private :
- virtual const char *getPassName() const {
- return "NVPTX Assembly Printer";
- }
+private:
+ virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
const Function *F;
std::string CurrentFnName;
@@ -207,31 +195,28 @@ private :
void printGlobalVariable(const GlobalVariable *GVar);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
+ const char *Modifier = 0);
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
- void printVecModifiedImmediate(const MachineOperand &MO,
- const char *Modifier, raw_ostream &O);
+ const char *Modifier = 0);
+ void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+ raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
+ const char *Modifier = 0);
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
- bool=false);
+ void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitHeader(Module &M, raw_ostream &O);
- void emitKernelFunctionDirectives(const Function& F,
- raw_ostream &O) const;
+ void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
- void emitFunctionTempData(const MachineFunction &MF,
- unsigned &FrameSize);
+ void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
bool isImageType(const Type *Ty);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
@@ -269,17 +254,16 @@ private:
void recordAndEmitFilenames(Module &);
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
- void emitPTXAddressSpace(unsigned int AddressSpace,
- raw_ostream &O) const;
- std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
- void printScalarConstant(Constant *CPV, raw_ostream &O) ;
- void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
- void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
- void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+ void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+ std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+ void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
- void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+ void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
@@ -289,10 +273,9 @@ private:
LineReader *reader;
LineReader *getReader(std::string);
public:
- NVPTXAsmPrinter(TargetMachine &TM,
- MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
VRidGlobal2LocalMap = NULL;
reader = NULL;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index a9abc00bf3f6..6533da5102b0 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -17,17 +17,15 @@
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
- return true;
-}
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MF.getFrameInfo()->hasStackObjects()) {
@@ -42,35 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
if (is64bit) {
- MachineInstr *MI = BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
- BuildMI(MBB, MI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
- .addReg(NVPTX::VRDepot);
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
} else {
- MachineInstr *MI = BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
- BuildMI(MBB, MI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
- .addReg(NVPTX::VRDepot);
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
}
- }
- else {
+ } else {
// mov %SP, %depot;
if (is64bit)
- BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
- .addReg(NVPTX::VRDepot);
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
else
- BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
- .addReg(NVPTX::VRDepot);
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
}
}
}
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+ MachineBasicBlock &MBB) const {}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index ee87b3997e78..819f1dd3f4be 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,6 @@
#include "llvm/Target/TargetFrameLowering.h"
-
namespace llvm {
class NVPTXTargetMachine;
@@ -26,13 +25,16 @@ class NVPTXFrameLowering : public TargetFrameLowering {
public:
explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
- tm(_tm), is64bit(_is64bit) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+ is64bit(_is64bit) {}
virtual bool hasFP(const MachineFunction &MF) const;
virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
};
} // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4e92f0e785fe..e862988c85d1 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,42 +11,36 @@
//
//===----------------------------------------------------------------------===//
-
-#include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
#include "NVPTXISelDAGToDAG.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/GlobalValue.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "nvptx-isel"
using namespace llvm;
-
-static cl::opt<bool>
-UseFMADInstruction("nvptx-mad-enable",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
- cl::init(false));
+static cl::opt<bool> UseFMADInstruction(
+ "nvptx-mad-enable", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+ cl::init(false));
static cl::opt<int>
-FMAContractLevel("nvptx-fma-level",
- cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::init(2));
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
-
-static cl::opt<int>
-UsePrecDivF32("nvptx-prec-divf32",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if avaiable."),
- cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+ "nvptx-prec-divf32", cl::ZeroOrMore,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if avaiable."),
+ cl::init(2));
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
@@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
return new NVPTXDAGToDAGISel(TM, OptLevel);
}
-
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
-: SelectionDAGISel(tm, OptLevel),
- Subtarget(tm.getSubtarget<NVPTXSubtarget>())
-{
+ : SelectionDAGISel(tm, OptLevel),
+ Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
// Always do fma.f32 fpcontract if the target supports the instruction.
// Always do fma.f64 fpcontract if the target supports the instruction.
// Do mad.f32 is nvptx-mad-enable is specified and the target does not
// support fma.f32.
doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
- doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
- (FMAContractLevel>=1);
- doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
- (FMAContractLevel>=1);
- doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
- (FMAContractLevel==2);
- doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
- (FMAContractLevel==2);
+ doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+ doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+ doFMAF32AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+ doFMAF64AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
@@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
-SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode())
- return NULL; // Already selected.
+ return NULL; // Already selected.
SDNode *ResNode = NULL;
switch (N->getOpcode()) {
@@ -105,29 +95,48 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
ResNode = SelectStore(N);
break;
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4:
+ ResNode = SelectLoadVector(N);
+ break;
+ case NVPTXISD::LDGV2:
+ case NVPTXISD::LDGV4:
+ case NVPTXISD::LDUV2:
+ case NVPTXISD::LDUV4:
+ ResNode = SelectLDGLDUVector(N);
+ break;
+ case NVPTXISD::StoreV2:
+ case NVPTXISD::StoreV4:
+ ResNode = SelectStoreVector(N);
+ break;
+ default:
+ break;
}
if (ResNode)
return ResNode;
return SelectCode(N);
}
-
-static unsigned int
-getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
-{
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+ const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getSrcValue();
if (!Src)
return NVPTX::PTXLdStInstCode::LOCAL;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
- case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
- case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
- case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_LOCAL:
+ return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED:
+ return NVPTX::PTXLdStInstCode::SHARED;
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
return NVPTX::PTXLdStInstCode::CONSTANT;
- case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
- case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_GENERIC:
+ return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM:
+ return NVPTX::PTXLdStInstCode::PARAM;
case llvm::ADDRESS_SPACE_CONST:
// If the arch supports generic address space, translate it to GLOBAL
// for correctness.
@@ -138,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
return NVPTX::PTXLdStInstCode::GLOBAL;
else
return NVPTX::PTXLdStInstCode::CONSTANT;
- default: break;
+ default:
+ break;
}
}
return NVPTX::PTXLdStInstCode::LOCAL;
}
-
-SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
- SDNode *NVPTXLD= NULL;
+ SDNode *NVPTXLD = NULL;
// do not support pre/post inc/dec
if (LD->isIndexed())
@@ -189,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned fromTypeWidth = ScalarVT.getSizeInBits();
unsigned int fromType;
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -208,121 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
if (SelectDirectAddr(N1, Addr)) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_avar;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 7);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(N1.getNode(), N1, Base, Offset):
- SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Addr, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_asi;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 8);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(N1.getNode(), N1, Base, Offset):
- SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
- default: return NULL;
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari;
+ break;
+ default:
+ return NULL;
+ }
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 8);
- }
- else {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
- default: return NULL;
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg;
+ break;
+ default:
+ return NULL;
+ }
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 7);
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), N1, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
}
if (NVPTXLD != NULL) {
@@ -334,7 +388,590 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return NVPTXLD;
}
-SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT LoadedVT = MemSD->getMemoryVT();
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int FromType;
+ // The last operand holds the original LoadSDNode::getExtensionType() value
+ unsigned ExtensionType = cast<ConstantSDNode>(
+ N->getOperand(N->getNumOperands() - 1))->getZExtValue();
+ if (ExtensionType == ISD::SEXTLOAD)
+ FromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ FromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::LoadV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ break;
+ case NVPTXISD::LoadV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ break;
+ default:
+ return NULL;
+ }
+
+ EVT EltVT = N->getValueType(0);
+
+ if (SelectDirectAddr(Op1, Addr)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_avar;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_avar;
+ break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_asi;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_asi;
+ break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg;
+ break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ }
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+
+ EVT RetVT = N->getValueType(0);
+
+ // Select opcode
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+ break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+ break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
@@ -375,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
// - for integer type, always use 'u'
//
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned toTypeWidth = ScalarVT.getSizeInBits();
unsigned int toType;
if (ScalarVT.isFloatingPoint())
toType = NVPTX::PTXLdStInstCode::Float;
@@ -394,124 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
if (SelectDirectAddr(N2, Addr)) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_avar;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Addr, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 8);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(N2.getNode(), N2, Base, Offset):
- SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Addr, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_asi;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 9);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(N2.getNode(), N2, Base, Offset):
- SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
- default: return NULL;
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari;
+ break;
+ default:
+ return NULL;
+ }
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 9);
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
} else {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg_64;
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg;
+ break;
+ default:
+ return NULL;
+ }
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- N2, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 8);
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), N2, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
}
if (NVPTXST != NULL) {
@@ -523,12 +1202,388 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return NVPTXST;
}
+SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *ST;
+ EVT EltVT = Op1.getValueType();
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT StoreVT = MemSD->getMemoryVT();
+
+ // Address Space Setting
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
+ report_fatal_error("Cannot store to pointer that points to constant "
+ "memory space");
+ }
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ assert(StoreVT.isSimple() && "Store value is not simple");
+ MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToType;
+ if (ScalarVT.isFloatingPoint())
+ ToType = NVPTX::PTXLdStInstCode::Float;
+ else
+ ToType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ SmallVector<SDValue, 12> StOps;
+ SDValue N2;
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::StoreV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ N2 = N->getOperand(3);
+ break;
+ case NVPTXISD::StoreV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ StOps.push_back(N->getOperand(3));
+ StOps.push_back(N->getOperand(4));
+ N2 = N->getOperand(5);
+ break;
+ default:
+ return NULL;
+ }
+
+ StOps.push_back(getI32Imm(IsVolatile));
+ StOps.push_back(getI32Imm(CodeAddrSpace));
+ StOps.push_back(getI32Imm(VecType));
+ StOps.push_back(getI32Imm(ToType));
+ StOps.push_back(getI32Imm(ToTypeWidth));
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_avar;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_avar;
+ break;
+ }
+ break;
+ }
+ StOps.push_back(Addr);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_asi;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_asi;
+ break;
+ }
+ break;
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari_64;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari;
+ break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg_64;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg_64;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg;
+ break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg;
+ break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(N2);
+ }
+
+ StOps.push_back(Chain);
+
+ ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return ST;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// Return true if TGA or ES.
- if (N.getOpcode() == ISD::TargetGlobalAddress
- || N.getOpcode() == ISD::TargetExternalSymbol) {
+ if (N.getOpcode() == ISD::TargetGlobalAddress ||
+ N.getOpcode() == ISD::TargetExternalSymbol) {
Address = N;
return true;
}
@@ -546,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
}
// symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- SDValue base=Addr.getOperand(0);
+ SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
return true;
@@ -574,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
}
// register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Offset = CurDAG->getTargetConstant(0, mvt);
@@ -584,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // direct calls.
+ return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
@@ -592,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
}
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
if (FrameIndexSDNode *FIN =
- dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
// Constant offset from frame ref.
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
@@ -624,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
// (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Src = mN->getSrcValue();
- }
- else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
Src = mN->getSrcValue();
}
if (!Src)
@@ -637,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
-bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
+ default:
+ return true;
+ case 'm': // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
@@ -666,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
// pattern matcher inserts a bunch of IMOVi8rr to convert
// the imm to i8imm, and this causes instruction selection
// to fail.
-bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
- SDValue &Retval) {
- if (!(N.getOpcode() == ISD::UNDEF) &&
- !(N.getOpcode() == ISD::Constant))
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+ if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
return false;
if (N.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ccd69b29dd42..70e8e464297d 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,8 +18,8 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Intrinsics.h"
using namespace llvm;
namespace {
@@ -64,16 +64,18 @@ public:
const NVPTXSubtarget &Subtarget;
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ virtual bool SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
private:
- // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
- SDNode* SelectLoad(SDNode *N);
- SDNode* SelectStore(SDNode *N);
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectLoadVector(SDNode *N);
+ SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectStoreVector(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
@@ -96,7 +98,6 @@ private:
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
-
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f1a99d77be9d..6e01a5a82071 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -11,30 +11,29 @@
//
//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
#include "NVPTXISelLowering.h"
+#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/MC/MCSectionELF.h"
#include <sstream>
#undef DEBUG_TYPE
@@ -44,28 +43,39 @@ using namespace llvm;
static unsigned int uniqueCallSite = 0;
-static cl::opt<bool>
-RetainVectorOperands("nvptx-codegen-vectors",
- cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
- cl::init(true));
+static cl::opt<bool> sched4reg(
+ "nvptx-sched4reg",
+ cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
-static cl::opt<bool>
-sched4reg("nvptx-sched4reg",
- cl::desc("NVPTX Specific: schedule for register pressue"),
- cl::init(false));
+static bool IsPTXVectorType(MVT VT) {
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::v2i8:
+ case MVT::v4i8:
+ case MVT::v2i16:
+ case MVT::v4i16:
+ case MVT::v2i32:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return true;
+ }
+}
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
-: TargetLowering(TM, new NVPTXTargetObjectFile()),
- nvTM(&TM),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
// then generating calls to memset, mempcy or memmove.
- maxStoresPerMemset = (unsigned)0xFFFFFFFF;
- maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
- maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -87,82 +97,51 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
- if (RetainVectorOperands) {
- addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
- addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
- addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
- addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
- addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
- addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
- addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
- addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
- addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
- addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
-
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
- }
-
// Operations not directly supported by NVPTX.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (nvptxSubtarget.hasROT64()) {
- setOperationAction(ISD::ROTL , MVT::i64, Legal);
- setOperationAction(ISD::ROTR , MVT::i64, Legal);
- }
- else {
- setOperationAction(ISD::ROTL , MVT::i64, Expand);
- setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Legal);
+ setOperationAction(ISD::ROTR, MVT::i64, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
}
if (nvptxSubtarget.hasROT32()) {
- setOperationAction(ISD::ROTL , MVT::i32, Legal);
- setOperationAction(ISD::ROTR , MVT::i32, Legal);
- }
- else {
- setOperationAction(ISD::ROTL , MVT::i32, Expand);
- setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTR, MVT::i32, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
}
- setOperationAction(ISD::ROTL , MVT::i16, Expand);
- setOperationAction(ISD::ROTR , MVT::i16, Expand);
- setOperationAction(ISD::ROTL , MVT::i8, Expand);
- setOperationAction(ISD::ROTR , MVT::i8, Expand);
- setOperationAction(ISD::BSWAP , MVT::i16, Expand);
- setOperationAction(ISD::BSWAP , MVT::i32, Expand);
- setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
// Indirect branch is not supported.
// This also disables Jump Table creation.
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
- setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// We want to legalize constant related memmove and memcopy
// intrinsics.
@@ -185,92 +164,114 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setTruncStoreAction(MVT::i8, MVT::i1, Expand);
// This is legal in NVPTX
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
// TRAP can be lowered to PTX trap
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
-
- // By default, CONCAT_VECTORS is implemented via store/load
- // through stack. It is slow and uses local memory. We need
- // to custom-lowering them.
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
-
- // Expand vector int to float and float to int conversions
- // - For SINT_TO_FP and UINT_TO_FP, the src type
- // (Node->getOperand(0).getValueType())
- // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
- // the dest type (Node->getValueType(0)) is used.
- //
- // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
- // case, and
- // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
- //
- // That is why v4i32 or v2i32 are used here.
- //
- // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
- // (LegalizeVectorOps.cpp).
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Register custom handling for vector loads/stores
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT VT = (MVT::SimpleValueType) i;
+ if (IsPTXVectorType(VT)) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
+ }
+ }
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties();
}
-
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
- case NVPTXISD::CALL: return "NVPTXISD::CALL";
- case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
- case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
- case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
- case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
+ default:
+ return 0;
+ case NVPTXISD::CALL:
+ return "NVPTXISD::CALL";
+ case NVPTXISD::RET_FLAG:
+ return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::Wrapper:
+ return "NVPTXISD::Wrapper";
+ case NVPTXISD::NVBuiltin:
+ return "NVPTXISD::NVBuiltin";
+ case NVPTXISD::DeclareParam:
+ return "NVPTXISD::DeclareParam";
case NVPTXISD::DeclareScalarParam:
return "NVPTXISD::DeclareScalarParam";
- case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
- case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
- case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
- case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
- case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
- case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
- case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
- case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
- case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
- case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
- case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
- case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
- case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
- case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
- case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
- case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
- case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
- case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
- case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
- case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
- case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
- case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
- case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
- case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::DeclareRet:
+ return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareRetParam:
+ return "NVPTXISD::DeclareRetParam";
+ case NVPTXISD::PrintCall:
+ return "NVPTXISD::PrintCall";
+ case NVPTXISD::LoadParam:
+ return "NVPTXISD::LoadParam";
+ case NVPTXISD::StoreParam:
+ return "NVPTXISD::StoreParam";
+ case NVPTXISD::StoreParamS32:
+ return "NVPTXISD::StoreParamS32";
+ case NVPTXISD::StoreParamU32:
+ return "NVPTXISD::StoreParamU32";
+ case NVPTXISD::MoveToParam:
+ return "NVPTXISD::MoveToParam";
+ case NVPTXISD::CallArgBegin:
+ return "NVPTXISD::CallArgBegin";
+ case NVPTXISD::CallArg:
+ return "NVPTXISD::CallArg";
+ case NVPTXISD::LastCallArg:
+ return "NVPTXISD::LastCallArg";
+ case NVPTXISD::CallArgEnd:
+ return "NVPTXISD::CallArgEnd";
+ case NVPTXISD::CallVoid:
+ return "NVPTXISD::CallVoid";
+ case NVPTXISD::CallVal:
+ return "NVPTXISD::CallVal";
+ case NVPTXISD::CallSymbol:
+ return "NVPTXISD::CallSymbol";
+ case NVPTXISD::Prototype:
+ return "NVPTXISD::Prototype";
+ case NVPTXISD::MoveParam:
+ return "NVPTXISD::MoveParam";
+ case NVPTXISD::MoveRetval:
+ return "NVPTXISD::MoveRetval";
+ case NVPTXISD::MoveToRetval:
+ return "NVPTXISD::MoveToRetval";
+ case NVPTXISD::StoreRetval:
+ return "NVPTXISD::StoreRetval";
+ case NVPTXISD::PseudoUseParam:
+ return "NVPTXISD::PseudoUseParam";
+ case NVPTXISD::RETURN:
+ return "NVPTXISD::RETURN";
+ case NVPTXISD::CallSeqBegin:
+ return "NVPTXISD::CallSeqBegin";
+ case NVPTXISD::CallSeqEnd:
+ return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::LoadV2:
+ return "NVPTXISD::LoadV2";
+ case NVPTXISD::LoadV4:
+ return "NVPTXISD::LoadV4";
+ case NVPTXISD::LDGV2:
+ return "NVPTXISD::LDGV2";
+ case NVPTXISD::LDGV4:
+ return "NVPTXISD::LDGV4";
+ case NVPTXISD::LDUV2:
+ return "NVPTXISD::LDUV2";
+ case NVPTXISD::LDUV4:
+ return "NVPTXISD::LDUV4";
+ case NVPTXISD::StoreV2:
+ return "NVPTXISD::StoreV2";
+ case NVPTXISD::StoreV4:
+ return "NVPTXISD::StoreV4";
}
}
+bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT == MVT::i1;
+}
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
@@ -280,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
}
-std::string NVPTXTargetLowering::getPrototype(Type *retTy,
- const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- unsigned retAlignment) const {
+std::string NVPTXTargetLowering::getPrototype(
+ Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
@@ -299,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
unsigned size = 0;
if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
- if (size < 32) size = 32;
- }
- else {
+ if (size < 32)
+ size = 32;
+ } else {
assert(retTy->isFloatingPointTy() &&
"Floating point type expected here");
size = retTy->getPrimitiveSizeInBits();
}
O << ".param .b" << size << " _";
- }
- else if (isa<PointerType>(retTy))
- O << ".param .b" << getPointerTy().getSizeInBits()
- << " _";
+ } else if (isa<PointerType>(retTy))
+ O << ".param .b" << getPointerTy().getSizeInBits() << " _";
else {
if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned totalsz = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- totalsz += sz/8;
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
}
}
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << totalsz << "]";
- }
- else {
- assert(false &&
- "Unknown return type");
+ O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ } else {
+ assert(false && "Unknown return type");
}
}
- }
- else {
+ } else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned idx = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -354,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " _";
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
++idx;
}
- if (i < e-1)
+ if (i < e - 1)
O << ", ";
}
}
@@ -372,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
bool first = true;
MVT thePointerTy = getPointerTy();
- for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
const Type *Ty = Args[i].Ty;
if (!first) {
O << ", ";
@@ -383,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32) sz = 32;
- }
- else if (isa<PointerType>(Ty))
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
@@ -397,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
continue;
}
const PointerType *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy &&
- "Param with byval attribute should be a pointer type");
+ assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
if (isABI) {
unsigned align = Outs[i].Flags.getByValAlign();
unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
- O << ".param .align " << align
- << " .b8 ";
+ O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
continue;
- }
- else {
+ } else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, ETy, vtparts);
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -421,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " ";
O << "_";
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
}
- if (i<e-1)
+ if (i < e - 1)
O << ", ";
}
continue;
@@ -438,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
return O.str();
}
-
-SDValue
-NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
- ArgListTy &Args = CLI.Args;
- Type *retTy = CLI.RetTy;
- ImmutableCallSite *CS = CLI.CS;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ ArgListTy &Args = CLI.Args;
+ Type *retTy = CLI.RetTy;
+ ImmutableCallSite *CS = CLI.CS;
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, true));
+ Chain =
+ DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
SDValue InFlag = Chain.getValue(1);
assert((Outs.size() == Args.size()) &&
@@ -466,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned paramCount = 0;
// Declare the .params or .reg need to pass values
// to the function
- for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT VT = Outs[i].VT;
if (Outs[i].Flags.isByVal() == false) {
@@ -477,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isABI)
isReg = 0;
unsigned sz = VT.getSizeInBits();
- if (VT.isInteger() && (sz < 32)) sz = 32;
+ if (VT.isInteger() && (sz < 32))
+ sz = 32;
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(isReg, MVT::i32),
- InFlag };
+ DAG.getConstant(isReg, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+ DAG.getConstant(0, MVT::i32), OutVals[i],
+ InFlag };
unsigned opcode = NVPTXISD::StoreParam;
if (isReg)
@@ -509,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// struct or vector
SmallVector<EVT, 16> vtparts;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
- assert(PTy &&
- "Type of a byval parameter should be pointer");
+ assert(PTy && "Type of a byval parameter should be pointer");
ComputeValueVTs(*this, PTy->getElementType(), vtparts);
if (isABI) {
@@ -520,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The ByValAlign in the Outs[i].Flags is alway set at this point, so we
// don't need to
// worry about natural alignment or not. See TargetLowering::LowerCallTo()
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+ InFlag
+ };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
- for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned elems = 1;
EVT elemtype = vtparts[j];
if (vtparts[j].isVector()) {
elems = vtparts[j].getVectorNumElements();
elemtype = vtparts[j].getVectorElementType();
}
- for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- OutVals[i],
- DAG.getConstant(curOffset,
- getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false, 0);
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
- MVT::i32),
- DAG.getConstant(curOffset, MVT::i32),
- theVal, InFlag };
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ theVal, InFlag };
Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
CopyParamOps, 5);
InFlag = Chain.getValue(1);
- curOffset += sz/8;
+ curOffset += sz / 8;
}
}
++paramCount;
@@ -562,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Non-abi, struct or vector
// Declare a bunch or .reg .b<size> .param<n>
unsigned curOffset = 0;
- for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned elems = 1;
EVT elemtype = vtparts[j];
if (vtparts[j].isVector()) {
elems = vtparts[j].getVectorNumElements();
elemtype = vtparts[j].getVectorElementType();
}
- for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
- MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(1, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(1, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
- SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
- DAG.getConstant(curOffset,
- getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+ false, false, false, 0);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(0, MVT::i32), theVal,
@@ -610,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
// individual .reg .b<size> func_retval<0..> for non ABI
unsigned resultsz = 0;
- for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = resvtparts[i];
if (resvtparts[i].isVector()) {
elems = resvtparts[i].getVectorNumElements();
elemtype = resvtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
if (isABI == false) {
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
- }
- else {
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ } else {
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
}
if (isABI == false) {
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -641,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (isABI) {
if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
- retTy->isPointerTy() ) {
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -652,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
DeclareRetOps, 5);
InFlag = Chain.getValue(1);
- }
- else {
+ } else {
if (Func) { // direct call
if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
retAlignment = getDataLayout()->getABITypeAlignment(retTy);
@@ -663,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
retAlignment = getDataLayout()->getABITypeAlignment(retTy);
}
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
- MVT::i32),
- DAG.getConstant(resultsz/8, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = { Chain,
+ DAG.getConstant(retAlignment, MVT::i32),
+ DAG.getConstant(resultsz / 8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps, 5);
InFlag = Chain.getValue(1);
@@ -684,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// INLINEASM SDNode.
SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
- const char *asmstr = nvTM->getManagedStrPool()->
- getManagedString(proto_string.c_str())->c_str();
- SDValue InlineAsmOps[] = { Chain,
- DAG.getTargetExternalSymbol(asmstr,
- getPointerTy()),
- DAG.getMDNode(0),
- DAG.getTargetConstant(0, MVT::i32), InFlag };
+ const char *asmstr = nvTM->getManagedStrPool()
+ ->getManagedString(proto_string.c_str())->c_str();
+ SDValue InlineAsmOps[] = {
+ Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+ DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+ };
Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrintCallOps[] = { Chain,
- DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
- : retCount, MVT::i32),
- InFlag };
- Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
- PrintCallVTs, PrintCallOps, 3);
+ SDValue PrintCallOps[] = {
+ Chain,
+ DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+ InFlag
+ };
+ Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+ dl, PrintCallVTs, PrintCallOps, 3);
InFlag = Chain.getValue(1);
// Ops to print out the function name
@@ -717,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallArgBeginOps, 2);
InFlag = Chain.getValue(1);
- for (unsigned i=0, e=paramCount; i!=e; ++i) {
+ for (unsigned i = 0, e = paramCount; i != e; ++i) {
unsigned opcode;
- if (i==(e-1))
+ if (i == (e - 1))
opcode = NVPTXISD::LastCallArg;
else
opcode = NVPTXISD::CallArg;
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(i, MVT::i32),
- InFlag };
+ DAG.getConstant(i, MVT::i32), InFlag };
Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
InFlag = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallArgEndOps[] = { Chain,
- DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
InFlag };
- Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
- 3);
+ Chain =
+ DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
InFlag = Chain.getValue(1);
if (!Func) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrototypeOps[] = { Chain,
- DAG.getConstant(uniqueCallSite, MVT::i32),
+ SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
InFlag = Chain.getValue(1);
@@ -751,33 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Ins.size() > 0) {
if (isABI) {
unsigned resoffset = 0;
- for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = Ins[i].VT.getSizeInBits();
- if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
- std::vector<EVT> LoadRetVTs;
- LoadRetVTs.push_back(Ins[i].VT);
- LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ if (Ins[i].VT.isInteger() && (sz < 8))
+ sz = 8;
+ EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
+ SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resoffset, MVT::i32), InFlag };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
- &LoadRetOps[0], LoadRetOps.size());
+ LoadRetOps, array_lengthof(LoadRetOps));
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
InVals.push_back(retval);
- resoffset += sz/8;
+ resoffset += sz / 8;
}
- }
- else {
+ } else {
SmallVector<EVT, 16> resvtparts;
ComputeValueVTs(*this, retTy, resvtparts);
assert(Ins.size() == resvtparts.size() &&
"Unexpected number of return values in non-ABI case");
unsigned paramNum = 0;
- for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(EVT(Ins[i].VT) == resvtparts[i] &&
"Unexpected EVT type in non-ABI case");
unsigned numelems = 1;
@@ -787,17 +773,13 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
elemtype = Ins[i].VT.getVectorElementType();
}
std::vector<SDValue> tempRetVals;
- for (unsigned j=0; j<numelems; ++j) {
- std::vector<EVT> MoveRetVTs;
- MoveRetVTs.push_back(elemtype);
- MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> MoveRetOps;
- MoveRetOps.push_back(Chain);
- MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
- MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
- MoveRetOps.push_back(InFlag);
+ for (unsigned j = 0; j < numelems; ++j) {
+ EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
+ SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(paramNum, MVT::i32),
+ InFlag };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
- &MoveRetOps[0], MoveRetOps.size());
+ MoveRetOps, array_lengthof(MoveRetOps));
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
tempRetVals.push_back(retval);
@@ -811,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
}
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, true),
- DAG.getIntPtrConstant(uniqueCallSite+1, true),
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+ DAG.getIntPtrConstant(uniqueCallSite + 1, true),
InFlag);
uniqueCallSite++;
@@ -826,76 +807,183 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
// (see LegalizeDAG.cpp). This is slow and uses local memory.
// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue NVPTXTargetLowering::
-LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
SmallVector<SDValue, 8> Ops;
unsigned NumOperands = Node->getNumOperands();
- for (unsigned i=0; i < NumOperands; ++i) {
+ for (unsigned i = 0; i < NumOperands; ++i) {
SDValue SubOp = Node->getOperand(i);
EVT VVT = SubOp.getNode()->getValueType(0);
EVT EltVT = VVT.getVectorElementType();
unsigned NumSubElem = VVT.getVectorNumElements();
- for (unsigned j=0; j < NumSubElem; ++j) {
+ for (unsigned j = 0; j < NumSubElem; ++j) {
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
DAG.getIntPtrConstant(j)));
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+ Ops.size());
}
-SDValue NVPTXTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
- case ISD::RETURNADDR: return SDValue();
- case ISD::FRAMEADDR: return SDValue();
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return Op;
+ case ISD::RETURNADDR:
+ return SDValue();
+ case ISD::FRAMEADDR:
+ return SDValue();
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return Op;
case ISD::BUILD_VECTOR:
case ISD::EXTRACT_SUBVECTOR:
return Op;
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::CONCAT_VECTORS:
+ return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG);
+ case ISD::LOAD:
+ return LowerLOAD(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::i1)
+ return LowerLOADi1(Op, DAG);
+ else
+ return SDValue();
+}
// v = ld i1* addr
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
-SDValue NVPTXTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
- assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
- SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(),
- LD->getAlignment());
+ SDValue newLD =
+ DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
// in LegalizeDAG.cpp which also uses MergeValues.
- SDValue Ops[] = {result, LD->getChain()};
+ SDValue Ops[] = { result, LD->getChain() };
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValVT = Op.getOperand(1).getValueType();
+ if (ValVT == MVT::i1)
+ return LowerSTOREi1(Op, DAG);
+ else if (ValVT.isVector())
+ return LowerSTOREVector(Op, DAG);
+ else
+ return SDValue();
+}
+
+SDValue
+NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDValue Val = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT ValVT = Val.getValueType();
+
+ if (ValVT.isVector()) {
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 stores of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ if (!ValVT.isSimple())
+ return SDValue();
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ unsigned Opcode = 0;
+ EVT EltVT = ValVT.getVectorElementType();
+ unsigned NumElts = ValVT.getVectorNumElements();
+
+ // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // stored type to i16 and propogate the "real" type as the memory type.
+ bool NeedExt = false;
+ if (EltVT.getSizeInBits() < 16)
+ NeedExt = true;
+
+ switch (NumElts) {
+ default:
+ return SDValue();
+ case 2:
+ Opcode = NVPTXISD::StoreV2;
+ break;
+ case 4: {
+ Opcode = NVPTXISD::StoreV4;
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> Ops;
+
+ // First is the chain
+ Ops.push_back(N->getOperand(0));
+
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i));
+ if (NeedExt)
+ // ANY_EXTEND is correct here since the store will only look at the
+ // lower-order bits anyway.
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
+
+ // Then any remaining arguments
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+
+ SDValue NewSt = DAG.getMemIntrinsicNode(
+ Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+ //return DCI.CombineTo(N, NewSt, true);
+ return NewSt;
+ }
+
+ return SDValue();
+}
+
// st i1 v, addr
// =>
// v1 = zxt v to i8
// st i8, addr
-SDValue NVPTXTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -906,18 +994,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
- MVT::i8, Tmp3);
- SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+ SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
return Result;
}
-
-SDValue
-NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
- EVT v) const {
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+ int idx, EVT v) const {
std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
std::stringstream suffix;
suffix << idx;
@@ -930,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return getExtSymb(DAG, ".PARAM", idx, v);
}
-SDValue
-NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
return getExtSymb(DAG, ".HLPPARAM", idx);
}
// Check to see if the kernel argument is image*_t or sampler_t
bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
- static const char *const specialTypes[] = {
- "struct._image2d_t",
- "struct._image3d_t",
- "struct._sampler_t"
- };
+ static const char *const specialTypes[] = { "struct._image2d_t",
+ "struct._image3d_t",
+ "struct._sampler_t" };
const Type *Ty = arg->getType();
const PointerType *PTy = dyn_cast<PointerType>(Ty);
@@ -954,7 +1035,7 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
return false;
const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
- const std::string TypeName = STy ? STy->getName() : "";
+ const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
if (TypeName == specialTypes[i])
@@ -963,17 +1044,15 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
return false;
}
-SDValue
-NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
const DataLayout *TD = getDataLayout();
const Function *F = MF.getFunction();
- const AttrListPtr &PAL = F->getAttributes();
+ const AttributeSet &PAL = F->getAttributes();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -984,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
std::vector<Type *> argTypes;
std::vector<const Argument *> theArgs;
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
+ I != E; ++I) {
theArgs.push_back(I);
argTypes.push_back(I->getType());
}
- assert(argTypes.size() == Ins.size() &&
- "Ins types and function types did not match");
+ //assert(argTypes.size() == Ins.size() &&
+ // "Ins types and function types did not match");
int idx = 0;
- for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+ for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
Type *Ty = argTypes[i];
EVT ObjectVT = getValueType(Ty);
- assert(ObjectVT == Ins[i].VT &&
- "Ins type did not match function type");
+ //assert(ObjectVT == Ins[i].VT &&
+ // "Ins type did not match function type");
// If the kernel argument is image*_t or sampler_t, convert it to
// a i32 constant holding the parameter position. This can later
// matched in the AsmPrinter to output the correct mangled name.
- if (isImageOrSamplerVal(theArgs[i],
- (theArgs[i]->getParent() ?
- theArgs[i]->getParent()->getParent() : 0))) {
+ if (isImageOrSamplerVal(
+ theArgs[i],
+ (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+ : 0))) {
assert(isKernel && "Only kernels can have image/sampler params");
- InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+ InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
continue;
}
if (theArgs[i]->use_empty()) {
// argument is dead
- InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ if (ObjectVT.isVector()) {
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+ }
+ } else {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ }
continue;
}
@@ -1019,29 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
// to newly created nodes. The SDNOdes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
+ if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (ObjectVT.isVector()) {
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned Offset = 0;
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue B = DAG.getIntPtrConstant(Offset);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ //getParamSymbol(DAG, idx, EltVT),
+ //DAG.getConstant(Offset, getPointerTy()));
+ A, B);
+ Value *SrcValue = Constant::getNullValue(PointerType::get(
+ EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+ SDValue Ld = DAG.getLoad(
+ EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+ false,
+ TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ Offset += EltVT.getStoreSizeInBits() / 8;
+ InVals.push_back(Ld);
+ }
+ continue;
+ }
+
// A plain scalar.
if (isABI || isKernel) {
// If ABI, load from the param symbol
SDValue Arg = getParamSymbol(DAG, idx);
- Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
- F->getContext()),
- llvm::ADDRESS_SPACE_PARAM));
- SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
- MachinePointerInfo(srcValue), false, false,
- false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
- F->getContext())));
+ // Conjure up a value that we can get the address space from.
+ // FIXME: Using a constant here is a hack.
+ Value *srcValue = Constant::getNullValue(
+ PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
+ SDValue p = DAG.getLoad(
+ ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+ false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
InVals.push_back(p);
- }
- else {
+ } else {
// If no ABI, just move the param symbol
SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
InVals.push_back(p);
}
continue;
@@ -1058,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
if (isKernel)
InVals.push_back(p);
else {
- SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
- DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
- p);
+ SDValue p2 = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
InVals.push_back(p2);
}
} else {
// Have to move a set of param symbols to registers and
// store them locally and return the local pointer in InVals
const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
- assert(elemPtrType &&
- "Byval parameter should be a pointer type");
+ assert(elemPtrType && "Byval parameter should be a pointer type");
Type *elemType = elemPtrType->getElementType();
// Compute the constituent parts
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
unsigned totalsize = 0;
- for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
totalsize += vtparts[j].getStoreSizeInBits();
- SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
- CreateStackObject(totalsize/8, 16, false),
- getPointerTy());
+ SDValue localcopy = DAG.getFrameIndex(
+ MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+ getPointerTy());
unsigned sizesofar = 0;
std::vector<SDValue> theChains;
- for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned numElems = 1;
- if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
- for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+ if (vtparts[j].isVector())
+ numElems = vtparts[j].getVectorNumElements();
+ for (unsigned k = 0, ke = numElems; k != ke; ++k) {
EVT tmpvt = vtparts[j];
- if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+ if (tmpvt.isVector())
+ tmpvt = tmpvt.getVectorElementType();
SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
getParamSymbol(DAG, idx, tmpvt));
- SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
- DAG.getConstant(sizesofar, getPointerTy()));
- theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
- MachinePointerInfo(), false, false, 0));
- sizesofar += tmpvt.getStoreSizeInBits()/8;
+ SDValue addr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+ DAG.getConstant(sizesofar, getPointerTy()));
+ theChains.push_back(DAG.getStore(
+ Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+ sizesofar += tmpvt.getStoreSizeInBits() / 8;
++idx;
}
}
@@ -1118,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
//}
if (!OutChains.empty())
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size()));
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+ OutChains.size()));
return Chain;
}
-SDValue
-NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
unsigned sizesofar = 0;
unsigned idx = 0;
- for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
EVT theValType = theVal.getValueType();
unsigned numElems = 1;
- if (theValType.isVector()) numElems = theValType.getVectorNumElements();
- for (unsigned j=0,je=numElems; j!=je; ++j) {
+ if (theValType.isVector())
+ numElems = theValType.getVectorNumElements();
+ for (unsigned j = 0, je = numElems; j != je; ++j) {
SDValue tmpval = theVal;
if (theValType.isVector())
tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- theValType.getVectorElementType(),
- tmpval, DAG.getIntPtrConstant(j));
- Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
- dl, MVT::Other,
- Chain,
- DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+ theValType.getVectorElementType(), tmpval,
+ DAG.getIntPtrConstant(j));
+ Chain = DAG.getNode(
+ isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+ MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
tmpval);
if (theValType.isVector())
- sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+ sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
else
- sizesofar += theValType.getStoreSizeInBits()/8;
+ sizesofar += theValType.getStoreSizeInBits() / 8;
++idx;
}
}
@@ -1162,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
}
-void
-NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const
-{
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
if (Constraint.length() > 1)
return;
else
@@ -1177,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// NVPTX suuport vector of legal types of any length in Intrinsics because the
// NVPTX specific type legalizer
// will legalize them to the PTX supported length.
-bool
-NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
if (isTypeLegal(VT))
return true;
if (VT.isVector()) {
@@ -1189,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
return false;
}
-
// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
// of destination
// pointer. In particular, the address space information.
-bool
-NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
- unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+ IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
@@ -1253,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
-bool
-NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
@@ -1273,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
}
switch (AM.Scale) {
- case 0: // "r", "r+i" or "i" is allowed
+ case 0: // "r", "r+i" or "i" is allowed
break;
case 1:
- if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
+ if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
return false;
// Otherwise we have r+i.
break;
@@ -1313,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
-
-std::pair<unsigned, const TargetRegisterClass*>
+std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint.size() == 1) {
@@ -1337,9 +1441,253 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
-
-
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
+
+/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
+static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT ResVT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ assert(ResVT.isVector() && "Vector load must have vector type");
+
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 loads of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ assert(ResVT.isSimple() && "Can only handle simple types");
+ switch (ResVT.getSimpleVT().SimpleTy) {
+ default:
+ return;
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ EVT EltVT = ResVT.getVectorElementType();
+ unsigned NumElts = ResVT.getVectorNumElements();
+
+ // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default:
+ return;
+ case 2:
+ Opcode = NVPTXISD::LoadV2;
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ // The select routine does not have access to the LoadSDNode instance, so
+ // pass along the extension information
+ OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), LD->getMemoryVT(),
+ LD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+}
+
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Intrin = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Get the intrinsic ID
+ unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p: {
+ EVT ResVT = N->getValueType(0);
+
+ if (ResVT.isVector()) {
+ // Vector LDG/LDU
+
+ unsigned NumElts = ResVT.getVectorNumElements();
+ EVT EltVT = ResVT.getVectorElementType();
+
+ // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default:
+ return;
+ case 2:
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV2;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV2;
+ break;
+ }
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ switch (IntrinNo) {
+ default:
+ return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV4;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV4;
+ break;
+ }
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+
+ OtherOps.push_back(Chain); // Chain
+ // Skip operand 1 (intrinsic ID)
+ // Others
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(
+ Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res =
+ DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+ } else {
+ // i8 LDG/LDU
+ assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
+ "Custom handling of non-i8 ldu/ldg?");
+
+ // Just copy all operands as-is
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
+ // Force output to i16
+ SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ // We make sure the memory type is i8, which will be used during isel
+ // to select the proper instruction.
+ SDValue NewLD =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+ Ops.size(), MVT::i8, MemSD->getMemOperand());
+
+ Results.push_back(NewLD.getValue(0));
+ Results.push_back(NewLD.getValue(1));
+ }
+ }
+ }
+}
+
+void NVPTXTargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ report_fatal_error("Unhandled custom legalization");
+ case ISD::LOAD:
+ ReplaceLoadVector(N, DAG, Results);
+ return;
+ case ISD::INTRINSIC_W_CHAIN:
+ ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
+ return;
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 94a177ceb00a..3cd49d38af76 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -58,7 +58,16 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
- Dummy
+ Dummy,
+
+ LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LoadV4,
+ LDGV2, // LDG.v2
+ LDGV4, // LDG.v4
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4
};
}
@@ -78,7 +87,7 @@ public:
bool isTypeSupportedInIntrinsic(MVT VT) const;
- bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
@@ -92,18 +101,19 @@ public:
virtual unsigned getFunctionAlignment(const Function *F) const;
virtual EVT getSetCCResultType(EVT VT) const {
+ if (VT.isVector())
+ return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
ConstraintType getConstraintType(const std::string &Constraint) const;
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
- virtual SDValue
- LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
@@ -125,22 +135,29 @@ public:
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- virtual MVT getShiftAmountTy(EVT LHSTy) const {
- return MVT::i32;
- }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
private:
- const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+ const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
- SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
- MVT::i32) const;
+ SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+ EVT = MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cd50deb26a23..33a63c26f4e2 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -16,108 +16,62 @@
#include "NVPTXTargetMachine.h"
#define GET_INSTRINFO_CTOR
#include "NVPTXGenInstrInfo.inc"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include <cstdio>
-
using namespace llvm;
// FIXME: Add the subtarget support on this constructor.
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
-: NVPTXGenInstrInfo(),
- TM(tm),
- RegInfo(*this, *TM.getSubtargetImpl()) {}
-
+ : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
-void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+void NVPTXInstrInfo::copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
NVPTX::Int32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
- NVPTX::Int8RegsRegClass.contains(SrcReg))
+ NVPTX::Int8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
- NVPTX::Int1RegsRegClass.contains(SrcReg))
+ NVPTX::Int1RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
- NVPTX::Float32RegsRegClass.contains(SrcReg))
+ NVPTX::Float32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
- NVPTX::Int16RegsRegClass.contains(SrcReg))
+ NVPTX::Int16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
- NVPTX::Int64RegsRegClass.contains(SrcReg))
+ NVPTX::Int64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
- NVPTX::Float64RegsRegClass.contains(SrcReg))
+ NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
- NVPTX::V4F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
- NVPTX::V4I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
- NVPTX::V2F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
- NVPTX::V2I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
- NVPTX::V4I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
- NVPTX::V2I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
- NVPTX::V4I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
- NVPTX::V2I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
- NVPTX::V2I64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
- NVPTX::V2F64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else {
llvm_unreachable("Don't know how to copy a register");
}
}
-bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const {
// Look for the appropriate part of TSFlags
bool isMove = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
- NVPTX::SimpleMoveShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
isMove = (TSFlags == 1);
if (isMove) {
@@ -134,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
return false;
}
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
-{
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
switch (MI.getOpcode()) {
- default: return false;
+ default:
+ return false;
case NVPTX::INT_PTX_SREG_NTID_X:
case NVPTX::INT_PTX_SREG_NTID_Y:
case NVPTX::INT_PTX_SREG_NTID_Z:
@@ -155,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
}
}
-
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
- NVPTX::isLoadShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
isLoad = (TSFlags == 1);
if (isLoad)
AddrSpace = getLdStCodeAddrSpace(MI);
@@ -170,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isStore = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
- NVPTX::isStoreShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
isStore = (TSFlags == 1);
if (isStore)
AddrSpace = getLdStCodeAddrSpace(MI);
return isStore;
}
-
bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
unsigned addrspace = 0;
if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
@@ -192,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
return true;
}
-
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
@@ -216,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
/// Note that RemoveBranch and InsertBranch must be implemented to support
/// cases where this method returns success.
///
-bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool NVPTXInstrInfo::AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -248,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineInstr *SecondLastInst = I;
// If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
return true;
// If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
LastInst->getOpcode() == NVPTX::GOTO) {
- TBB = SecondLastInst->getOperand(1).getMBB();
+ TBB = SecondLastInst->getOperand(1).getMBB();
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
@@ -278,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
+ if (I == MBB.begin())
+ return 0;
--I;
if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
return 0;
@@ -288,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
- if (I == MBB.begin()) return 1;
+ if (I == MBB.begin())
+ return 1;
--I;
if (I->getOpcode() != NVPTX::CBranch)
return 1;
@@ -298,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-unsigned
-NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
+unsigned NVPTXInstrInfo::InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -310,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// One-way branch.
if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch
+ if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
- else // Conditional branch
- BuildMI(&MBB, DL, get(NVPTX::CBranch))
- .addReg(Cond[0].getReg()).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+ .addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(NVPTX::CBranch))
- .addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7b8e218b05b6..b1972e9b7254 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -23,8 +23,7 @@
namespace llvm {
-class NVPTXInstrInfo : public NVPTXGenInstrInfo
-{
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
public:
@@ -50,30 +49,26 @@ public:
* const TargetRegisterClass *RC) const;
*/
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const ;
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
+ virtual void copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+ virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isReadSpecialReg(MachineInstr &MI) const;
- virtual bool CanTailMerge(const MachineInstr *MI) const ;
+ virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ virtual bool AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ virtual unsigned InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
- return MI.getOperand(2).getImm();
+ return MI.getOperand(2).getImm();
}
};
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8a410b872925..f43abe283b58 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
def hasVote : Predicate<"Subtarget.hasVote()">;
def hasDouble : Predicate<"Subtarget.hasDouble()">;
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDG : Predicate<"Subtarget.hasLDG()">;
def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
@@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr];"), []>;
+ def _areg_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr];"), []>;
def _ari : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr+$offset];"), []>;
+ def _ari_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr+$offset];"), []>;
def _asi : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
-let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
-defm LD_v2i8 : LD<V2I8Regs>;
-defm LD_v4i8 : LD<V4I8Regs>;
-defm LD_v2i16 : LD<V2I16Regs>;
-defm LD_v4i16 : LD<V4I16Regs>;
-defm LD_v2i32 : LD<V2I32Regs>;
-defm LD_v4i32 : LD<V4I32Regs>;
-defm LD_v2f32 : LD<V2F32Regs>;
-defm LD_v4f32 : LD<V4F32Regs>;
-defm LD_v2i64 : LD<V2I64Regs>;
-defm LD_v2f64 : LD<V2F64Regs>;
-}
-
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
@@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr], $src;"), []>;
+ def _areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr], $src;"), []>;
def _ari : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr+$offset], $src;"), []>;
+ def _ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr+$offset], $src;"), []>;
def _asi : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
@@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
-let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
-defm ST_v2i8 : ST<V2I8Regs>;
-defm ST_v4i8 : ST<V4I8Regs>;
-defm ST_v2i16 : ST<V2I16Regs>;
-defm ST_v4i16 : ST<V4I16Regs>;
-defm ST_v2i32 : ST<V2I32Regs>;
-defm ST_v4i32 : ST<V4I32Regs>;
-defm ST_v2f32 : ST<V2F32Regs>;
-defm ST_v4f32 : ST<V4F32Regs>;
-defm ST_v2i64 : ST<V2I64Regs>;
-defm ST_v2f64 : ST<V2F64Regs>;
-}
-
// The following is used only in and after vector elementizations.
// Vector elementization happens at the machine instruction level, so the
// following instruction
@@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
def _v2_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
def _v2_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
@@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
def _v4_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
def _v4_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
-include "NVPTXVector.td"
-
include "NVPTXIntrinsics.td"
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 028a94bfd1bb..49e2568dfa2c 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
-// Vector ldu
-multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
- NVPTXInst eleInst, NVPTXInst eleInst64> {
- def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
- Requires<[hasLDU]>;
- def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
- Requires<[hasLDU]>;
+
+//-----------------------------------
+// Support for ldg on sm_35 or later
+//-----------------------------------
+
+def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ return M->getMemoryVT() == MVT::i8;
+}]>;
+
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+defm INT_PTX_LDG_GLOBAL_i8
+ : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+defm INT_PTX_LDG_GLOBAL_i16
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_f32
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_f64
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_p32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+defm INT_PTX_LDG_GLOBAL_p64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+
+// vector
+
+// Elementized vector ldg
+multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
-let VecInstType=isVecLD.Value in {
-defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
- V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
- INT_PTX_LDU_G_v2i8_ELE_64>;
-defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
- V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
- INT_PTX_LDU_G_v4i8_ELE_64>;
-defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
- V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
- INT_PTX_LDU_G_v2i16_ELE_64>;
-defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
- V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
- INT_PTX_LDU_G_v4i16_ELE_64>;
-defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
- V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
- INT_PTX_LDU_G_v2i32_ELE_64>;
-defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
- V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
- INT_PTX_LDU_G_v4i32_ELE_64>;
-defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
- V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
- INT_PTX_LDU_G_v2f32_ELE_64>;
-defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
- V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
- INT_PTX_LDU_G_v4f32_ELE_64>;
-defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
- V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
- INT_PTX_LDU_G_v2i64_ELE_64>;
-defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
- V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
- INT_PTX_LDU_G_v2f64_ELE_64>;
+multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
+// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
+defm INT_PTX_LDG_G_v2i8_ELE
+ : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i16_ELE
+ : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i32_ELE
+ : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f32_ELE
+ : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDG_G_v2i64_ELE
+ : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDG_G_v2f64_ELE
+ : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDG_G_v4i8_ELE
+ : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i16_ELE
+ : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i32_ELE
+ : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f32_ELE
+ : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 9273931e9919..7c257b4c6a89 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,31 +12,28 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/DataLayout.h"
using namespace llvm;
-namespace llvm {
-FunctionPass *createLowerAggrCopies();
-}
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
char NVPTXLowerAggrCopies::ID = 0;
// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
- Value *dstAddr, Value *len,
- //unsigned numLoads,
- bool srcVolatile, bool dstVolatile,
- LLVMContext &Context, Function &F) {
+static void convertTransferToLoop(
+ Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+ //unsigned numLoads,
+ bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
Type *indType = len->getType();
BasicBlock *origBB = splitAt->getParent();
@@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
// srcAddr and dstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS =
- dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS =
- dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
- unsigned dstAS =
- dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointer to the type of value getting stored
- dstAddr = builder.CreateBitCast(dstAddr,
- PointerType::get(val->getType(), dstAS));
+ dstAddr =
+ builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
IRBuilder<> loop(loopBB);
PHINode *ind = loop.CreatePHI(len->getType(), 0);
@@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
//BasicBlock *bb = BI;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+ ++II) {
+ if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false) continue;
+ if (load->hasOneUse() == false)
+ continue;
- if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+ if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ continue;
User *use = *(load->use_begin());
- if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+ if (StoreInst *store = dyn_cast<StoreInst>(use)) {
if (store->getOperand(0) != load) //getValueOperand
- continue;
+ continue;
aggrLoads.push_back(load);
}
- } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+ } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
Value *len = intr->getLength();
// If the number of elements being copied is greater
// than MaxAggrCopySize, lower it to a loop
- if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemcpys.push_back(intr);
}
@@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// turn variable length memcpy/memmov into loop
aggrMemcpys.push_back(intr);
}
- } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+ } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
Value *len = memsetintr->getLength();
- if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemsets.push_back(memsetintr);
}
@@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
}
}
}
- if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
- && (aggrMemsets.size() == 0)) return false;
+ if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+ (aggrMemsets.size() == 0))
+ return false;
//
// Do the transformation of an aggr load/copy/set to a loop
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index b150c69815dd..286e753fa92b 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,9 +15,9 @@
#ifndef NVPTX_LOWER_AGGR_COPIES_H
#define NVPTX_LOWER_AGGR_COPIES_H
-#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h
index b4a4dbce98a9..a95c16b1e67e 100644
--- a/lib/Target/NVPTX/NVPTXNumRegisters.h
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -11,10 +11,6 @@
#ifndef NVPTX_NUM_REGISTERS_H
#define NVPTX_NUM_REGISTERS_H
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = 396;
-
-}
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
#endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index e3cd46f063bf..282465359b07 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -13,260 +13,88 @@
#define DEBUG_TYPE "nvptx-reg-info"
-#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
+#include "NVPTX.h"
#include "NVPTXSubtarget.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
-
using namespace llvm;
-namespace llvm
-{
-std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return ".f32";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return ".f64";
- }
- else if (RC == &NVPTX::Int64RegsRegClass) {
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
return ".s64";
- }
- else if (RC == &NVPTX::Int32RegsRegClass) {
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
return ".s32";
- }
- else if (RC == &NVPTX::Int16RegsRegClass) {
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
return ".s16";
}
- // Int8Regs become 16-bit registers in PTX
- else if (RC == &NVPTX::Int8RegsRegClass) {
+ // Int8Regs become 16-bit registers in PTX
+ else if (RC == &NVPTX::Int8RegsRegClass) {
return ".s16";
- }
- else if (RC == &NVPTX::Int1RegsRegClass) {
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
- }
- else if (RC == &NVPTX::SpecialRegsRegClass) {
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
- }
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return ".v2.f32";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return ".v4.f32";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return ".v2.s32";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return ".v4.s32";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return ".v2.f64";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return ".v2.s64";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return ".v4.s16";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return ".v4.s16";
- }
- else {
+ } else {
return "INTERNAL";
}
return "";
}
-std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return "%f";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return "%fd";
- }
- else if (RC == &NVPTX::Int64RegsRegClass) {
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
return "%rd";
- }
- else if (RC == &NVPTX::Int32RegsRegClass) {
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
return "%r";
- }
- else if (RC == &NVPTX::Int16RegsRegClass) {
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
return "%rs";
- }
- else if (RC == &NVPTX::Int8RegsRegClass) {
+ } else if (RC == &NVPTX::Int8RegsRegClass) {
return "%rc";
- }
- else if (RC == &NVPTX::Int1RegsRegClass) {
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
return "%p";
- }
- else if (RC == &NVPTX::SpecialRegsRegClass) {
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
- }
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return "%v2f";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return "%v4f";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return "%v2r";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return "%v4r";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return "%v2fd";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return "%v2rd";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return "%v2s";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return "%v4rs";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return "%v2rc";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return "%v4rc";
- }
- else {
+ } else {
return "INTERNAL";
}
return "";
}
-
-bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return true;
- return false;
-}
-
-std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return (&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return (&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-int getNVPTXVectorSize(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return 4;
- llvm_unreachable("Not a vector register class");
-}
}
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
const NVPTXSubtarget &st)
- : NVPTXGenRegisterInfo(0),
- Is64Bit(st.is64Bit()) {}
+ : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
#define GET_REGINFO_TARGET_DESC
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
-const uint16_t* NVPTXRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
// NVPTX Callee Saved Reg Classes
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
return CalleeSavedRegClasses;
}
@@ -275,34 +103,24 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-void NVPTXRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj,
- RegScavenger *RS) const {
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+1).getImm();
+ MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-
-int NVPTXRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return 0;
}
@@ -310,16 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
-unsigned NVPTXRegisterInfo::getRARegister() const {
- return 0;
-}
-
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void NVPTXRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN,
- // ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 595178335ae2..d40682066142 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -17,7 +17,6 @@
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
-
#define GET_REGINFO_HEADER
#include "NVPTXGenRegisterInfo.inc"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -33,34 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
private:
bool Is64Bit;
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
- ManagedStringPool ManagedStrPool;
+ ManagedStringPool ManagedStrPool;
public:
- NVPTXRegisterInfo(const TargetInstrInfo &tii,
- const NVPTXSubtarget &st);
-
+ NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
//------------------------------------------------------
// Pure virtual functions from TargetRegisterInfo
//------------------------------------------------------
// NVPTX callee saved registers
- virtual const uint16_t*
+ virtual const uint16_t *
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
// NVPTX callee saved register classes
- virtual const TargetRegisterClass* const *
+ virtual const TargetRegisterClass *const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj,
- RegScavenger *RS=NULL) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -78,15 +71,9 @@ public:
};
-
-std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
-std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
-bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
-std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
-int getNVPTXVectorSize (const TargetRegisterClass *RC);
-const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
} // end namespace llvm
-
#endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ba158258b994..8d100d631683 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -37,9 +37,6 @@ foreach i = 0-395 in {
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
- // Vectors
- foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
- def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
// Arguments
def ia#i : NVPTXReg<"%ia"#i>;
@@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-def V2F32Regs
- : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Float32Regs, 2, ".v2.f32">;
-def V4F32Regs
- : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Float32Regs, 4, ".v4.f32">;
-def V2I32Regs
- : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Int32Regs, 2, ".v2.u32">;
-def V4I32Regs
- : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Int32Regs, 4, ".v4.u32">;
-def V2F64Regs
- : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Float64Regs, 2, ".v2.f64">;
-def V2I64Regs
- : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Int64Regs, 2, ".v2.u64">;
-def V2I16Regs
- : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
- Int16Regs, 2, ".v2.u16">;
-def V4I16Regs
- : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
- Int16Regs, 4, ".v4.u16">;
-def V2I8Regs
- : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
- Int8Regs, 2, ".v2.u8">;
-def V4I8Regs
- : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
- Int8Regs, 4, ".v4.u8">;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1ca466266f6..e166be5a68e4 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -14,8 +14,8 @@
#ifndef LLVM_NVPTXSECTION_H
#define LLVM_NVPTXSECTION_H
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/GlobalVariable.h"
#include <vector>
namespace llvm {
@@ -38,6 +38,8 @@ public:
virtual bool isBaseAddressKnownZero() const { return true; }
virtual bool UseCodeAlign() const { return false; }
virtual bool isVirtualSection() const { return false; }
+ virtual std::string getLabelBeginName() const { return ""; }
+ virtual std::string getLabelEndName() const { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 2836cad4f021..83dfe120899a 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -11,19 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Support/InstIterator.h"
-#include "NVPTXUtilities.h"
#include "NVPTXSplitBBatBar.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/InstIterator.h"
using namespace llvm;
-namespace llvm {
-FunctionPass *createSplitBBatBarPass();
-}
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
char NVPTXSplitBBatBar::ID = 0;
@@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
// This interface will most likely not be necessary, because this pass will
// not be invoked by the driver, but will be used as a prerequisite to
// another pass.
-FunctionPass *llvm::createSplitBBatBarPass() {
- return new NVPTXSplitBBatBar();
-}
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
index 9e4d5a066d4c..bdafba9075a0 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.h
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
@@ -15,8 +15,8 @@
#ifndef NVPTX_SPLIT_BB_AT_BAR_H
#define NVPTX_SPLIT_BB_AT_BAR_H
-#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7b62cce2c65c..2dcd73dcff9c 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -22,27 +22,23 @@ using namespace llvm;
// Select Driver Interface
#include "llvm/Support/CommandLine.h"
namespace {
-cl::opt<NVPTX::DrvInterface>
-DriverInterface(cl::desc("Choose driver interface:"),
- cl::values(
- clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
- clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
- clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
- clEnumValEnd),
- cl::init(NVPTX::NVCL));
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+ cl::desc("Choose driver interface:"),
+ cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+ clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+ clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+ cl::init(NVPTX::NVCL));
}
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
-: NVPTXGenSubtargetInfo(TT, CPU, FS),
- Is64Bit(is64Bit),
- PTXVersion(0),
- SmVersion(10) {
+ : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+ SmVersion(20) {
drvInterface = DriverInterface;
// Provide the default CPU if none
- std::string defCPU = "sm_10";
+ std::string defCPU = "sm_20";
ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 3cfd9718e541..670077daaa69 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -14,8 +14,8 @@
#ifndef NVPTXSUBTARGET_H
#define NVPTXSUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "NVPTX.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
#include "NVPTXGenSubtargetInfo.inc"
@@ -25,7 +25,7 @@
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-
+
std::string TargetName;
NVPTX::DrvInterface drvInterface;
bool Is64Bit;
@@ -57,16 +57,14 @@ public:
bool hasF32FTZ() const { return SmVersion >= 20; }
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
+ bool hasLDG() const { return SmVersion >= 32; }
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }
- inline bool hasSWROT32() const {
- return true;
- }
- inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+ inline bool hasSWROT32() const { return true; }
+ inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
-
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }
@@ -95,4 +93,4 @@ public:
} // End llvm namespace
-#endif // NVPTXSUBTARGET_H
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cbb490003d37..67ca6b58e5a6 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -12,27 +12,30 @@
//===----------------------------------------------------------------------===//
#include "NVPTXTargetMachine.h"
-#include "NVPTX.h"
-#include "NVPTXSplitBBatBar.h"
-#include "NVPTXLowerAggrCopies.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
-#include "llvm/PassManager.h"
+#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXSplitBBatBar.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -41,14 +44,12 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
-
using namespace llvm;
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
extern "C" void LLVMInitializeNVPTXTarget() {
// Register the target.
@@ -58,53 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() {
RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
+ // FIXME: This pass is really intended to be invoked during IR optimization,
+ // but it's very NVPTX-specific.
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
}
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
- StringRef TT,
- StringRef CPU,
- StringRef FS,
- const TargetOptions& Options,
- Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64bit)
-: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit),
- DL(Subtarget.getDataLayout()),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
- STTI(&TLInfo), VTTI(&TLInfo)
-/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
-}
-
-
+NVPTXTargetMachine::NVPTXTargetMachine(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(
+ *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
void NVPTXTargetMachine32::anchor() {}
-NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void NVPTXTargetMachine64::anchor() {}
-NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
namespace llvm {
class NVPTXPassConfig : public TargetPassConfig {
public:
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {}
NVPTXTargetMachine &getNVPTXTargetMachine() const {
return getTM<NVPTXTargetMachine>();
@@ -125,10 +115,7 @@ bool NVPTXPassConfig::addInstSelector() {
addPass(createSplitBBatBarPass());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
- addPass(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}
-bool NVPTXPassConfig::addPreRegAlloc() {
- return false;
-}
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 11bc9d4fa698..5fbcf735b48f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,77 +11,64 @@
//
//===----------------------------------------------------------------------===//
-
#ifndef NVPTX_TARGETMACHINE_H
#define NVPTX_TARGETMACHINE_H
-#include "NVPTXInstrInfo.h"
+#include "ManagedStringPool.h"
+#include "NVPTXFrameLowering.h"
#include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
-#include "NVPTXFrameLowering.h"
-#include "ManagedStringPool.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
- NVPTXSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- NVPTXInstrInfo InstrInfo;
- NVPTXTargetLowering TLInfo;
- TargetSelectionDAGInfo TSInfo;
+ NVPTXSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
// NVPTX does not have any call stack frame, but need a NVPTX specific
// FrameLowering class because TargetFrameLowering is abstract.
- NVPTXFrameLowering FrameLowering;
+ NVPTXFrameLowering FrameLowering;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
- ManagedStringPool ManagedStrPool;
-
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
+ ManagedStringPool ManagedStrPool;
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
public:
- NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OP,
- bool is64bit);
+ NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
- virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const DataLayout *getDataLayout() const { return &DL;}
- virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+ virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual const NVPTXRegisterInfo *getRegisterInfo() const {
return &(InstrInfo.getRegisterInfo());
}
virtual NVPTXTargetLowering *getTargetLowering() const {
- return const_cast<NVPTXTargetLowering*>(&TLInfo);
+ return const_cast<NVPTXTargetLowering *>(&TLInfo);
}
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
//virtual bool addInstSelector(PassManagerBase &PM,
// CodeGenOpt::Level OptLevel);
@@ -89,22 +76,19 @@ public:
//virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
ManagedStringPool *getManagedStrPool() const {
- return const_cast<ManagedStringPool*>(&ManagedStrPool);
+ return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
// Emission of machine code through JITCodeEmitter is not supported.
- virtual bool addPassesToEmitMachineCode(PassManagerBase &,
- JITCodeEmitter &,
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
bool = true) {
return true;
}
// Emission of machine code through MCJIT is not supported.
- virtual bool addPassesToEmitMC(PassManagerBase &,
- MCContext *&,
- raw_ostream &,
+ virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
bool = true) {
return true;
}
@@ -129,7 +113,6 @@ public:
CodeGenOpt::Level OL);
};
-
} // end namespace llvm
#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index b5698a2fc08f..6ab0e08ad091 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -46,45 +46,43 @@ public:
}
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
- TextSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getText());
- DataSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getDataRel());
- BSSSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getBSS());
- ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getReadOnly());
+ TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+ DataSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+ ReadOnlySection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
- StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- LSDASection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
+ StaticCtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ StaticDtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ LSDASection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ EHFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfAbbrevSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfStrSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLocSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfARangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfRangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
@@ -93,8 +91,7 @@ public:
virtual const MCSection *
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang,
- const TargetMachine &TM) const {
+ Mangler *Mang, const TargetMachine &TM) const {
return DataSection;
}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 3f52251cc1b2..6786eb02240c 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -12,11 +12,11 @@
#include "NVPTXUtilities.h"
#include "NVPTX.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Constants.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include <algorithm>
#include <cstring>
#include <map>
@@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
ManagedStatic<per_module_annot_t> annotationCache;
-
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
@@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(prop && "Annotation property not a string");
// value
- ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+ ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
@@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
bool llvm::isTexture(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a texture symbol");
return true;
}
@@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
bool llvm::isSurface(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a surface symbol");
return true;
}
@@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
bool llvm::isSampler(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a sampler symbol");
return true;
}
@@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
if (const Argument *arg = dyn_cast<Argument>(&val)) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
- if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
- annot)) {
+ if (llvm::findAllNVVMAnnotation(
+ func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
- annot)) {
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
- annot)) {
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
}
bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
}
bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
- y));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
}
bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
- z));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
}
bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
}
bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
- y));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
}
bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
- z));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
}
bool llvm::getMinCTASm(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
}
bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
- bool retval = llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
- x);
+ bool retval = llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
if (retval == false) {
// There is no NVVM metadata, check the calling convention
if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
@@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
else
return false;
}
- return (x==1);
+ return (x == 1);
}
bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
- bool retval = llvm::findAllNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
- Vs);
+ bool retval = llvm::findAllNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
if (retval == false)
return false;
- for (int i=0, e=Vs.size(); i<e; i++) {
+ for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
- if ( (v >> 16) == index ) {
- align = v & 0xFFFF;
+ if ((v >> 16) == index) {
+ align = v & 0xFFFF;
return true;
}
}
@@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
- for (int i=0, n = alignNode->getNumOperands();
- i<n; i++) {
+ for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
- if ( (v>>16) == index ) {
+ if ((v >> 16) == index) {
align = v & 0xFFFF;
return true;
}
- if ( (v>>16) > index ) {
+ if ((v >> 16) > index) {
return false;
}
}
@@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
// consider several special intrinsics in striping pointer casts, and
// provide an option to ignore GEP indicies for find out the base address only
// which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
- bool ignore_GEP_indices) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
V = V->stripPointerCasts();
while (true) {
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
@@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
// - ignore GEP indicies for find out the base address only, and
// - tracking PHINode
// which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
- std::set<const Value *> &processed) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
if (processed.find(V) != processed.end())
return NULL;
processed.insert(V);
@@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
return V;
}
-
// The following are some useful utilities for debuggung
BasicBlock *llvm::getParentBlock(Value *v) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index fe6ad559e9df..a208004297d0 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -14,17 +14,16 @@
#ifndef NVPTXUTILITIES_H
#define NVPTXUTILITIES_H
-#include "llvm/Value.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
#include <cstdarg>
#include <set>
#include <string>
#include <vector>
-namespace llvm
-{
+namespace llvm {
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
@@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
/// to pass into type construction of CallInst ctors. This turns a null
/// terminated list of pointers (or other value types) into a real live vector.
///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
va_list Args;
va_start(Args, A);
std::vector<T> Result;
@@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *skipPointerTransfer(const Value *V,
- std::set<const Value *> &processed);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
index 6a0e5328f62f..5f074b33a2d4 100644
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
namespace llvm {
-bool isParamLoad(const MachineInstr *MI)
-{
+bool isParamLoad(const MachineInstr *MI) {
if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
(MI->getOpcode() != NVPTX::LD_i64_avar))
return false;
@@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
return true;
}
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
-static int encode_leb128(uint64_t val, int *nbytes,
- char *space, int splen)
-{
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
char *a;
char *end = space + splen;
@@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
#undef DIGIT_WIDTH
#undef MORE_BYTES
-uint64_t encode_leb128(const char *str)
-{
- union { uint64_t x; char a[8]; } temp64;
+uint64_t encode_leb128(const char *str) {
+ union {
+ uint64_t x;
+ char a[8];
+ } temp64;
temp64.x = 0;
- for (unsigned i=0,e=strlen(str); i!=e; ++i)
- temp64.a[i] = str[e-1-i];
+ for (unsigned i = 0, e = strlen(str); i != e; ++i)
+ temp64.a[i] = str[e - 1 - i];
char encoded[16];
int nbytes;
int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
- (void)retval;
- assert(retval == 0 &&
- "Encoding to leb128 failed");
+ (void) retval;
+ assert(retval == 0 && "Encoding to leb128 failed");
assert(nbytes <= 8 &&
"Cannot support register names with leb128 encoding > 8 bytes");
temp64.x = 0;
- for (int i=0; i<nbytes; ++i)
+ for (int i = 0; i < nbytes; ++i)
temp64.a[i] = encoded[i];
return temp64.x;
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 000000000000..0ad62ce39b0d
--- /dev/null
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,177 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+// If an undefined string value is seen in a call to __nvvm_reflect("string"),
+// a default value of 0 will be used.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+ StringMap<int> VarMap;
+ typedef DenseMap<std::string, int>::iterator VarMapIter;
+ Function *ReflectFunction;
+
+public:
+ static char ID;
+ NVVMReflect() : ModulePass(ID) {
+ VarMap.clear();
+ ReflectFunction = 0;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+ virtual bool runOnModule(Module &);
+
+ void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+ cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+ "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+ false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+ cl::desc("A list of string=num assignments"),
+ cl::ValueRequired);
+
+/// The command line can look as follows :
+/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+ for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+ DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
+ SmallVector<StringRef, 4> NameValList;
+ StringRef(ReflectList[i]).split(NameValList, ",");
+ for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
+ SmallVector<StringRef, 2> NameValPair;
+ NameValList[j].split(NameValPair, "=");
+ assert(NameValPair.size() == 2 && "name=val expected");
+ std::stringstream ValStream(NameValPair[1]);
+ int Val;
+ ValStream >> Val;
+ assert((!(ValStream.fail())) && "integer value expected");
+ VarMap[NameValPair[0]] = Val;
+ }
+ }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+ if (!NVVMReflectEnabled)
+ return false;
+
+ setVarMap();
+
+ ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+ // If reflect function is not used, then there will be
+ // no entry in the module.
+ if (ReflectFunction == 0)
+ return false;
+
+ // Validate _reflect function
+ assert(ReflectFunction->isDeclaration() &&
+ "_reflect function should not have a body");
+ assert(ReflectFunction->getReturnType()->isIntegerTy() &&
+ "_reflect's return type should be integer");
+
+ std::vector<Instruction *> ToRemove;
+
+ // Go through the uses of ReflectFunction in this Function.
+ // Each of them should a CallInst with a ConstantArray argument.
+ // First validate that. If the c-string corresponding to the
+ // ConstantArray can be found successfully, see if it can be
+ // found in VarMap. If so, replace the uses of CallInst with the
+ // value found in VarMap. If not, replace the use with value 0.
+ for (Value::use_iterator I = ReflectFunction->use_begin(),
+ E = ReflectFunction->use_end();
+ I != E; ++I) {
+ assert(isa<CallInst>(*I) && "Only a call instruction can use _reflect");
+ CallInst *Reflect = cast<CallInst>(*I);
+
+ assert((Reflect->getNumOperands() == 2) &&
+ "Only one operand expect for _reflect function");
+ // In cuda, we will have an extra constant-to-generic conversion of
+ // the string.
+ const Value *conv = Reflect->getArgOperand(0);
+ assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+ const CallInst *ConvCall = cast<CallInst>(conv);
+ const Value *str = ConvCall->getArgOperand(0);
+ assert(isa<ConstantExpr>(str) &&
+ "Format of _reflect function not recognized");
+ const ConstantExpr *GEP = cast<ConstantExpr>(str);
+
+ const Value *Sym = GEP->getOperand(0);
+ assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
+
+ const Constant *SymStr = cast<Constant>(Sym);
+
+ assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ "Format of _reflect function not recognized");
+
+ assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ "Format of _reflect function not recognized");
+
+ std::string ReflectArg =
+ cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+
+ ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
+ DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
+
+ int ReflectVal = 0; // The default value is 0
+ if (VarMap.find(ReflectArg) != VarMap.end()) {
+ ReflectVal = VarMap[ReflectArg];
+ }
+ Reflect->replaceAllUsesWith(
+ ConstantInt::get(Reflect->getType(), ReflectVal));
+ ToRemove.push_back(Reflect);
+ }
+ if (ToRemove.size() == 0)
+ return false;
+
+ for (unsigned i = 0, e = ToRemove.size(); i != e; ++i)
+ ToRemove[i]->eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index f3624b9f23c7..cc7d4dc5ece7 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
extern "C" void LLVMInitializeNVPTXTargetInfo() {
RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
- "NVIDIA PTX 32-bit");
+ "NVIDIA PTX 32-bit");
RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
- "NVIDIA PTX 64-bit");
+ "NVIDIA PTX 64-bit");
}
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
deleted file mode 100644
index 8043e2de0972..000000000000
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ /dev/null
@@ -1,1248 +0,0 @@
-//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts operations on vector types to operations on their
-// element types.
-//
-// For generic binary and unary vector instructions, the conversion is simple.
-// Suppose we have
-// av = bv Vop cv
-// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
-// This gets converted to the following :
-// a1 = b1 Sop c1
-// a2 = b2 Sop c2
-//
-// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
-// For the above example, the map will look as follows:
-// av => [a1, a2]
-// bv => [b1, b2]
-//
-// In addition, initVectorInfo creates the following opcode->opcode map.
-// Vop => Sop
-// OtherVop => OtherSop
-// ...
-//
-// For vector specific instructions like vecbuild, vecshuffle etc, the
-// conversion is different. Look at comments near the functions with
-// prefix createVec<...>.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
-
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
- virtual bool runOnMachineFunction(MachineFunction &F);
-
- NVPTXTargetMachine &TM;
- MachineRegisterInfo *MRI;
- const NVPTXRegisterInfo *RegInfo;
- const NVPTXInstrInfo *InstrInfo;
-
- llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
- RegClassMap;
- llvm::DenseMap<unsigned, bool> SimpleMoveMap;
-
- llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
-
- bool isVectorInstr(MachineInstr *);
-
- SmallVector<unsigned, 4> getScalarRegisters(unsigned);
- unsigned getScalarVersion(unsigned);
- unsigned getScalarVersion(MachineInstr *);
-
- bool isVectorRegister(unsigned);
- const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
- unsigned numCopiesNeeded(MachineInstr *);
-
- void createLoadCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
- void createStoreCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createVecDest(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createCopies(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- unsigned copyProp(MachineFunction&);
- unsigned removeDeadMoves(MachineFunction&);
-
- void elementize(MachineFunction&);
-
- bool isSimpleMove(MachineInstr *);
-
- void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
-public:
-
- static char ID; // Pass identification, replacement for typeid
- VectorElementize(NVPTXTargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) {}
-
- virtual const char *getPassName() const {
- return "Convert LLVM vector types to their element types";
- }
-};
-
-char VectorElementize::ID = 1;
-}
-
-static cl::opt<bool>
-RemoveRedundantMoves("nvptx-remove-redundant-moves",
- cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
- cl::init(true));
-
-#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
- >> NVPTX::VecInstTypeShift)
-#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
-#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
-#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
-#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
-#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
-#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
-#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
-#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
-
-bool VectorElementize::isSimpleMove(MachineInstr *mi) {
- if (mi->isCopy())
- return true;
- unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
- >> NVPTX::SimpleMoveShift;
- return (TSFlags == 1);
-}
-
-bool VectorElementize::isVectorInstr(MachineInstr *mi) {
- if ((mi->getOpcode() == NVPTX::PHI) ||
- (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
- MachineOperand dest = mi->getOperand(0);
- return isVectorRegister(dest.getReg());
- }
- return ISVECINST(mi);
-}
-
-unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
- return getScalarVersion(mi->getOpcode());
-}
-
-///=============================================================================
-///Instr is assumed to be a vector instruction. For most vector instructions,
-///the size of the destination vector register gives the number of scalar copies
-///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
-///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
-///number of scalar copies needed.
-///=============================================================================
-unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
- unsigned numDefs=0;
- unsigned def;
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
-
- if (!oper.isReg()) continue;
- if (!oper.isDef()) continue;
- def = i;
- numDefs++;
- }
- assert((numDefs <= 1) && "Only 0 or 1 defs supported");
-
- if (numDefs == 1) {
- unsigned regnum = Instr->getOperand(def).getReg();
- if (ISVECEXTRACT(Instr))
- regnum = Instr->getOperand(1).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- else if (numDefs == 0) {
- assert(ISVECSTORE(Instr)
- && "Only 0 def instruction supported is vector store");
-
- unsigned regnum = Instr->getOperand(0).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- return 1;
-}
-
-const TargetRegisterClass *VectorElementize::
-getScalarRegClass(const TargetRegisterClass *RC) {
- assert(isNVPTXVectorRegClass(RC) &&
- "Not a vector register class");
- return getNVPTXElemClass(RC);
-}
-
-bool VectorElementize::isVectorRegister(unsigned reg) {
- const TargetRegisterClass *RC=MRI->getRegClass(reg);
- return isNVPTXVectorRegClass(RC);
-}
-
-///=============================================================================
-///For every vector register 'v' that is not already in the VectorToScalarMap,
-///create n scalar registers of the corresponding element type, where n
-///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
-///=============================================================================
-SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
- assert(isVectorRegister(regnum) && "Expecting a vector register here");
- // Create the scalar registers and put them in the map, if not already there.
- if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
- const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
- const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
-
- SmallVector<unsigned, 4> temp;
-
- for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
- temp.push_back(MRI->createVirtualRegister(scalarClass));
-
- VectorToScalarMap[regnum] = temp;
- }
- return VectorToScalarMap[regnum];
-}
-
-///=============================================================================
-///For a vector load of the form
-///va <= ldv2 [addr]
-///the following multi output instruction is created :
-///[v1, v2] <= LD [addr]
-///Look at NVPTXVector.td for the definitions of multi output loads.
-///=============================================================================
-void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstr *copy=copies[0];
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
- }
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
-
-}
-
-///=============================================================================
-///For a vector store of the form
-///stv2 va, [addr]
-///the following multi input instruction is created :
-///ST v1, v2, [addr]
-///Look at NVPTXVector.td for the definitions of multi input stores.
-///=============================================================================
-void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstr *copy=copies[0];
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- MachineOperand src = copy->getOperand(0);
- unsigned regnum = src.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///va <= shufflev2 vb, vc, <i1>, <i2>
-///gets converted to 2 moves into a1 and a2. The source of the moves depend on
-///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
-///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
-///instructions will be
-///a1 <= c2
-///a2 <= b1
-///=============================================================================
-void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned src1regnum = Instr->getOperand(1).getReg();
- unsigned src2regnum = Instr->getOperand(2).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
- SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
- MachineOperand which=Instr->getOperand(3+i);
- assert(which.isImm() && "Shuffle operand not a constant");
-
- int src=which.getImm();
- int elem=src%numcopies;
-
- if (which.getImm() < numcopies)
- copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
- else
- copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
- copies.push_back(copy);
- }
-}
-
-///=============================================================================
-///a <= extractv2 va, <i1>
-///gets turned into a simple move to the scalar register a. The source depends
-///on i1.
-///=============================================================================
-void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which = Instr->getOperand(2);
- assert(which.isImm() && "Extract operand not a constant");
-
- DebugLoc DL = Instr->getDebugLoc();
-
- MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- Instr->getOperand(0).getReg());
- copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
-
- copies.push_back(copy);
-}
-
-///=============================================================================
-///va <= vecinsertv2 vb, c, <i1>
-///This instruction copies all elements of vb to va, except the 'i1'th element.
-///The scalar value c becomes the 'i1'th element of va.
-///This gets translated to 2 (4 for vecinsertv4) moves.
-///=============================================================================
-void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which=Instr->getOperand(3);
- assert(which.isImm() && "Insert operand not a constant");
- unsigned int elem=which.getImm();
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
- if (i != elem)
- copy->addOperand(MachineOperand::CreateReg(src[i], false));
- else
- copy->addOperand(Instr->getOperand(2));
-
- copies.push_back(copy);
- }
-
-}
-
-///=============================================================================
-///va <= buildv2 b1, b2
-///gets translated to
-///a1 <= b1
-///a2 <= b2
-///=============================================================================
-void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
- copy->addOperand(Instr->getOperand(1+i));
-
- copies.push_back(copy);
- }
-
-}
-
-///=============================================================================
-///For a tex inst of the form
-///va <= op [scalar operands]
-///the following multi output instruction is created :
-///[v1, v2] <= op' [scalar operands]
-///=============================================================================
-void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstr *copy=copies[0];
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///Look at the vector instruction type and dispatch to the createVec<...>
-///function that creates the scalar copies.
-///=============================================================================
-void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- if (ISVECLOAD(Instr)) {
- createLoadCopy(F, Instr, copies);
- return;
- }
- if (ISVECSTORE(Instr)) {
- createStoreCopy(F, Instr, copies);
- return;
- }
- if (ISVECSHUFFLE(Instr)) {
- createVecShuffle(F, Instr, copies);
- return;
- }
- if (ISVECEXTRACT(Instr)) {
- createVecExtract(F, Instr, copies);
- return;
- }
- if (ISVECINSERT(Instr)) {
- createVecInsert(F, Instr, copies);
- return;
- }
- if (ISVECDEST(Instr)) {
- createVecDest(F, Instr, copies);
- return;
- }
- if (ISVECBUILD(Instr)) {
- createVecBuild(F, Instr, copies);
- return;
- }
-
- unsigned numcopies=numCopiesNeeded(Instr);
-
- for (unsigned i=0; i<numcopies; ++i)
- copies.push_back(F.CloneMachineInstr(Instr));
-
- for (unsigned i=0; i<numcopies; ++i) {
- MachineInstr *copy = copies[i];
-
- std::vector<MachineOperand> allOperands;
- std::vector<bool> isDef;
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
- MachineOperand oper = copy->getOperand(j);
- allOperands.push_back(oper);
- if (oper.isReg())
- isDef.push_back(oper.isDef());
- else
- isDef.push_back(false);
- }
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
- copy->RemoveOperand(0);
-
- copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
-
- for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
- MachineOperand oper=allOperands[j];
- if (oper.isReg()) {
- unsigned regnum = oper.getReg();
- if (isVectorRegister(regnum)) {
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
- }
- else
- copy->addOperand(oper);
- }
- else
- copy->addOperand(oper);
- }
- }
-}
-
-///=============================================================================
-///Scan through all basic blocks, looking for vector instructions.
-///For each vector instruction I, insert the scalar copies before I, and
-///add I into toRemove vector. Finally remove all instructions in toRemove.
-///=============================================================================
-void VectorElementize::elementize(MachineFunction &F) {
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
- BI!=BE; ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- std::vector<MachineInstr *> copies;
- std::vector<MachineInstr *> toRemove;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
- II!=IE; ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isVectorInstr(Instr))
- continue;
-
- copies.clear();
- createCopies(F, Instr, copies);
- for (unsigned i=0, e=copies.size(); i!=e; ++i)
- BB->insert(II, copies[i]);
-
- assert((copies.size() > 0) && "Problem in createCopies");
- toRemove.push_back(Instr);
- }
- for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
- F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
- }
-}
-
-///=============================================================================
-///a <= b
-///...
-///...
-///x <= op(a, ...)
-///gets converted to
-///
-///x <= op(b, ...)
-///The original move is still present. This works on SSA form machine code.
-///Note that a <= b should be a simple vreg-to-vreg move instruction.
-///TBD : I didn't find a function that can do replaceOperand, so I remove
-///all operands and add all of them again, replacing the one while adding.
-///=============================================================================
-unsigned VectorElementize::copyProp(MachineFunction &F) {
- unsigned numReplacements = 0;
-
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- // Don't do copy propagation on PHI as it will cause unnecessary
- // live range overlap.
- if ((Instr->getOpcode() == TargetOpcode::PHI) ||
- (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
- continue;
-
- bool needsReplacement = false;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- if (!oper.isReg()) continue;
- if (oper.isDef()) continue;
- if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
-
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
-
- if (!defInstr) continue;
-
- if (!isSimpleMove(defInstr)) continue;
-
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!defSrc.isReg()) continue;
- if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
-
- needsReplacement = true;
-
- }
- if (!needsReplacement) continue;
-
- numReplacements++;
-
- std::vector<MachineOperand> operands;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- bool flag = false;
- do {
- if (!(oper.isReg()))
- break;
- if (oper.isDef())
- break;
- if (!(RegInfo->isVirtualRegister(oper.getReg())))
- break;
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
- if (!(isSimpleMove(defInstr)))
- break;
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!(defSrc.isReg()))
- break;
- if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
- break;
- operands.push_back(defSrc);
- flag = true;
- } while (0);
- if (flag == false)
- operands.push_back(oper);
- }
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
- Instr->RemoveOperand(0);
- for (unsigned i=0, e=operands.size(); i!=e; ++i)
- Instr->addOperand(operands[i]);
-
- }
- }
- return numReplacements;
-}
-
-///=============================================================================
-///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
-///them to deadMoves vector. Then remove all instructions in deadMoves.
-///=============================================================================
-unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
- std::vector<MachineInstr *> deadMoves;
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isSimpleMove(Instr)) continue;
-
- MachineOperand dest = Instr->getOperand(0);
- assert(dest.isReg() && "dest of move not a register");
- assert(RegInfo->isVirtualRegister(dest.getReg()) &&
- "dest of move not a virtual register");
-
- if (MRI->use_empty(dest.getReg())) {
- deadMoves.push_back(Instr);
- }
- }
- }
-
- for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
- F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
-
- return deadMoves.size();
-}
-
-///=============================================================================
-///Main function for this pass.
-///=============================================================================
-bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
- MRI = &F.getRegInfo();
-
- RegInfo = TM.getRegisterInfo();
- InstrInfo = TM.getInstrInfo();
-
- VectorToScalarMap.clear();
-
- elementize(F);
-
- if (RemoveRedundantMoves)
- while (1) {
- if (copyProp(F) == 0) break;
- removeDeadMoves(F);
- }
-
- return true;
-}
-
-FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
- return new VectorElementize(tm);
-}
-
-unsigned VectorElementize::getScalarVersion(unsigned opcode) {
- if (opcode == NVPTX::PHI)
- return opcode;
- if (opcode == NVPTX::IMPLICIT_DEF)
- return opcode;
- switch(opcode) {
- default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
- case TargetOpcode::COPY: return TargetOpcode::COPY;
- case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
- case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
- case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
- case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
- case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
- case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
- case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
- case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
- case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
- case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
- case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
- case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
- case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
- case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
- case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
- case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
- case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
- case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
- case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
- case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
- case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
- case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
- case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
- case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
- case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
- case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
- case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
- case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
- case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
- case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
- case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
- case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
- case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
- case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
- case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
- case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
- case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
- case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
- case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
- case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
- case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
- case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
- case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
- case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
- case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
- case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
- case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
- case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
- case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
- case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
- case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
- case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
- case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
- case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
- case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
- case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
- case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
- case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
- case NVPTX::VNegV2I16: return NVPTX::INEG16;
- case NVPTX::VNegV2I32: return NVPTX::INEG32;
- case NVPTX::VNegV2I64: return NVPTX::INEG64;
- case NVPTX::VNegV2I8: return NVPTX::INEG8;
- case NVPTX::VNegV4I16: return NVPTX::INEG16;
- case NVPTX::VNegV4I32: return NVPTX::INEG32;
- case NVPTX::VNegV4I8: return NVPTX::INEG8;
- case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
- case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNotV2I16: return NVPTX::NOT16;
- case NVPTX::VNotV2I32: return NVPTX::NOT32;
- case NVPTX::VNotV2I64: return NVPTX::NOT64;
- case NVPTX::VNotV2I8: return NVPTX::NOT8;
- case NVPTX::VNotV4I16: return NVPTX::NOT16;
- case NVPTX::VNotV4I32: return NVPTX::NOT32;
- case NVPTX::VNotV4I8: return NVPTX::NOT8;
- case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
- case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
- case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
- case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
- case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
- case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
- case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
- case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
- case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
- case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
- case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
- case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
- case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
- case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
- case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
- case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
- case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
- case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
- case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
- case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
- case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
- case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
- case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
- case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
- case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
- case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
- case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
- case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
- case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
- case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
- case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
- case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
- case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
- case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
- case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
- case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
- case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
-
- case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
- case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
- case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
- case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
- case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
- case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
- case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
- case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
- case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
- case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
- case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
- case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
- case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
- case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
- case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
- case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
- case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
- case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
- case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
- case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
- case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
- case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
- case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
- case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
- case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
- case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
- case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
- case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
- case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
- case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
- case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
- case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
- case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
- case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
- case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
- case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
-
- case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
- case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
- case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
- case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
- case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
- case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
- case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
- case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
-
- case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
- case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
- case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
- case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
- case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
- case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
- case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
- case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
-
- case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
- case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
- case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
- case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
- case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
- case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
- case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
- case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
-
- case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
- case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
- case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
- case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
- case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
- case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
- case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
- case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
-
- case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
- case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
- case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
- case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
- case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
- case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
- case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
- case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
-
- case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
- case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
- case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
- case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
- case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
- case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
- case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
- case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
-
- case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
- case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
- case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
- case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
- case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
- case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
- case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
- case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
-
- case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
- case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
- case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
- case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
- case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
- case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
- case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
- case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
-
- case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
- case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
- case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
- case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
- case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
- case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
- case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
- case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
-
- case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
- case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
- case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
- case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
- case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
- case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
- case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
- case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;
- }
- return 0;
-}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index a7347efd7850..45cc0b8b67f2 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -24,22 +24,21 @@ enum {
CLK_LUMINANCE = 0x10B9
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
- ,
+ ,
CLK_Rx = 0x10BA,
CLK_RGx = 0x10BB,
CLK_RGBx = 0x10BC
#endif
};
-
typedef enum clk_channel_type {
// valid formats for float return types
- CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
- CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
- CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
- CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
- CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
- CLK_FLOAT = 0x10DE, // four channel RGBA float
+ CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
+ CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+ CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
+ CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+ CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
+ CLK_FLOAT = 0x10DE, // four channel RGBA float
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
CLK_UNORM_SHORT_565 = 0x10D4,
@@ -48,7 +47,7 @@ typedef enum clk_channel_type {
#endif
// valid only for integer return types
- CLK_SIGNED_INT8 = 0x10D7,
+ CLK_SIGNED_INT8 = 0x10D7,
CLK_SIGNED_INT16 = 0x10D8,
CLK_SIGNED_INT32 = 0x10D9,
CLK_UNSIGNED_INT8 = 0x10DA,
@@ -56,70 +55,68 @@ typedef enum clk_channel_type {
CLK_UNSIGNED_INT32 = 0x10DC,
// CI SPI for CPU
- __CLK_UNORM_INT8888 , // four channel ARGB unorm8
- __CLK_UNORM_INT8888R, // four channel BGRA unorm8
+ __CLK_UNORM_INT8888, // four channel ARGB unorm8
+ __CLK_UNORM_INT8888R, // four channel BGRA unorm8
__CLK_VALID_IMAGE_TYPE_COUNT,
__CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
- __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
- // represent any image type
- __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
-}clk_channel_type;
+ __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+ // represent any image type
+ __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
typedef enum clk_sampler_type {
- __CLK_ADDRESS_BASE = 0,
- CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
+ __CLK_ADDRESS_BASE = 0,
+ CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
- CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
+ CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
#endif
- __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
- CLK_ADDRESS_CLAMP_TO_EDGE |
- CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
- __CLK_ADDRESS_BITS = 3, // number of bits required to
- // represent address info
-
- __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
- CLK_NORMALIZED_COORDS_FALSE = 0,
- CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
- __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
- CLK_NORMALIZED_COORDS_TRUE,
- __CLK_NORMALIZED_BITS = 1, // number of bits required to
- // represent normalization
-
- __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
- __CLK_NORMALIZED_BITS,
- CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
- CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
- CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
- __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
- CLK_FILTER_ANISOTROPIC,
- __CLK_FILTER_BITS = 2, // number of bits required to
- // represent address info
-
- __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
- CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
- CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
- CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
- __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
- CLK_MIP_ANISOTROPIC,
- __CLK_MIP_BITS = 2,
-
- __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
- __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
- __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
-
- __CLK_ANISOTROPIC_RATIO_BITS = 5,
- __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
- (__CLK_ANISOTROPIC_RATIO_BITS-1)
+ __CLK_ADDRESS_MASK =
+ CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+ __CLK_ADDRESS_BITS = 3, // number of bits required to
+ // represent address info
+
+ __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+ CLK_NORMALIZED_COORDS_FALSE = 0,
+ CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+ __CLK_NORMALIZED_MASK =
+ CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+ __CLK_NORMALIZED_BITS = 1, // number of bits required to
+ // represent normalization
+
+ __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+ CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+ CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+ CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+ __CLK_FILTER_MASK =
+ CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+ __CLK_FILTER_BITS = 2, // number of bits required to
+ // represent address info
+
+ __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+ CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+ CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+ CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+ __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+ __CLK_MIP_BITS = 2,
+
+ __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+ __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+ __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+ __CLK_ANISOTROPIC_RATIO_BITS = 5,
+ __CLK_ANISOTROPIC_RATIO_MASK =
+ (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
} clk_sampler_type;
// Memory synchronization
-#define CLK_LOCAL_MEM_FENCE (1 << 0)
-#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
#endif // __CL_COMMON_DEFINES_H__
diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py
deleted file mode 100644
index ed0666823124..000000000000
--- a/lib/Target/NVPTX/gen-register-defs.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-
-num_regs = 396
-
-outFile = open('NVPTXRegisterInfo.td', 'w')
-
-outFile.write('''
-//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class NVPTXReg<string n> : Register<n> {
- let Namespace = "NVPTX";
-}
-
-class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
- : RegisterClass <"NVPTX", regTypes, alignment, regList>;
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-// Special Registers used as stack pointer
-def VRFrame : NVPTXReg<"%SP">;
-def VRFrameLocal : NVPTXReg<"%SPL">;
-
-// Special Registers used as the stack
-def VRDepot : NVPTXReg<"%Depot">;
-''')
-
-# Predicates
-outFile.write('''
-//===--- Predicate --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
-
-# Int8
-outFile.write('''
-//===--- 8-bit ------------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
-
-# Int16
-outFile.write('''
-//===--- 16-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
-
-# Int32
-outFile.write('''
-//===--- 32-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
-
-# Int64
-outFile.write('''
-//===--- 64-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
-
-# F32
-outFile.write('''
-//===--- 32-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
-
-# F64
-outFile.write('''
-//===--- 64-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
-
-# Vector registers
-outFile.write('''
-//===--- Vector -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
-
-for i in range(0, num_regs):
- outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
-
-# Argument registers
-outFile.write('''
-//===--- Arguments --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
-
-outFile.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('''
-// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-''')
-
-outFile.write('''
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-''')
-
-
-outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
-outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
-
-outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
-outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
-
-outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
-outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
-
-outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
-outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
-
-outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
-outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
-
-outFile.close()
-
-
-outFile = open('NVPTXNumRegisters.h', 'w')
-outFile.write('''
-//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NVPTX_NUM_REGISTERS_H
-#define NVPTX_NUM_REGISTERS_H
-
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = %d;
-
-}
-
-#endif
-''' % num_regs)
-
-outFile.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 192d18d66440..6036428fad93 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
+ PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 3d583060d1ef..bacc108c62b4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
- if (!Modifier) {
- unsigned CCReg = MI->getOperand(OpNo+1).getReg();
- unsigned RegNo;
- switch (CCReg) {
- default: llvm_unreachable("Unknown CR register");
- case PPC::CR0: RegNo = 0; break;
- case PPC::CR1: RegNo = 1; break;
- case PPC::CR2: RegNo = 2; break;
- case PPC::CR3: RegNo = 3; break;
- case PPC::CR4: RegNo = 4; break;
- case PPC::CR5: RegNo = 5; break;
- case PPC::CR6: RegNo = 6; break;
- case PPC::CR7: RegNo = 7; break;
- }
-
- // Print the CR bit number. The Code is ((BI << 5) | BO) for a
- // BCC, but we must have the positive form here (BO == 12)
- unsigned BI = Code >> 5;
- assert((Code & 0xF) == 12 &&
- "BO in predicate bit must have the positive form");
-
- unsigned Value = 4*RegNo + BI;
- O << Value;
- return;
- }
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
- case PPC::PRED_ALWAYS: return; // Don't print anything for always.
case PPC::PRED_LT: O << "lt"; return;
case PPC::PRED_LE: O << "le"; return;
case PPC::PRED_EQ: O << "eq"; return;
@@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
assert(StringRef(Modifier) == "reg" &&
"Need to specify 'cc' or 'reg' as predicate op modifier!");
- // Don't print the register for 'always'.
- if (Code == PPC::PRED_ALWAYS) return;
printOperand(MI, OpNo+1, O);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 87ecb13a4c76..ec2657403e0c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,8 +13,8 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
@@ -30,11 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case PPC::fixup_ppc_toc:
+ case PPC::fixup_ppc_tlsreg:
+ case PPC::fixup_ppc_nofixup:
return Value;
- case PPC::fixup_ppc_lo14:
- case PPC::fixup_ppc_toc16_ds:
- return (Value & 0xffff) << 2;
case PPC::fixup_ppc_brcond14:
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
@@ -46,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_ha16:
return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
return Value & 0xffff;
+ case PPC::fixup_ppc_lo16_ds:
+ return Value & 0xfffc;
}
}
@@ -61,7 +60,9 @@ public:
void RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {}
+ MCValue Target, uint64_t &FixedValue) {
+ llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+ }
};
class PPCAsmBackend : public MCAsmBackend {
@@ -78,10 +79,9 @@ public:
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_lo16", 16, 16, 0 },
{ "fixup_ppc_ha16", 16, 16, 0 },
- { "fixup_ppc_lo14", 16, 14, 0 },
- { "fixup_ppc_toc", 0, 64, 0 },
- { "fixup_ppc_toc16", 16, 16, 0 },
- { "fixup_ppc_toc16_ds", 16, 14, 0 }
+ { "fixup_ppc_lo16_ds", 16, 14, 0 },
+ { "fixup_ppc_tlsreg", 0, 0, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -92,6 +92,20 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
bool mayNeedRelaxation(const MCInst &Inst) const {
// FIXME.
return false;
@@ -99,7 +113,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
@@ -135,11 +149,6 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- llvm_unreachable("UNIMP");
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
return createMachObjectWriter(new PPCMachObjectWriter(
@@ -161,19 +170,6 @@ namespace {
ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
PPCAsmBackend(T), OSABI(OSABI) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- Value = adjustFixupValue(Fixup.getKind(), Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != 4; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
- }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
deleted file mode 100644
index 9c975c089ea6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PPC target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCBASEINFO_H
-#define PPCBASEINFO_H
-
-#include "PPCMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
- using namespace PPC;
- switch (RegEnum) {
- case 0: return 0;
- case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
- case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
- case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
- case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
- case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
- case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
- case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
- case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
- case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
- case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
- case R10: case X10: case F10: case V10: case CR2EQ: return 10;
- case R11: case X11: case F11: case V11: case CR2UN: return 11;
- case R12: case X12: case F12: case V12: case CR3LT: return 12;
- case R13: case X13: case F13: case V13: case CR3GT: return 13;
- case R14: case X14: case F14: case V14: case CR3EQ: return 14;
- case R15: case X15: case F15: case V15: case CR3UN: return 15;
- case R16: case X16: case F16: case V16: case CR4LT: return 16;
- case R17: case X17: case F17: case V17: case CR4GT: return 17;
- case R18: case X18: case F18: case V18: case CR4EQ: return 18;
- case R19: case X19: case F19: case V19: case CR4UN: return 19;
- case R20: case X20: case F20: case V20: case CR5LT: return 20;
- case R21: case X21: case F21: case V21: case CR5GT: return 21;
- case R22: case X22: case F22: case V22: case CR5EQ: return 22;
- case R23: case X23: case F23: case V23: case CR5UN: return 23;
- case R24: case X24: case F24: case V24: case CR6LT: return 24;
- case R25: case X25: case F25: case V25: case CR6GT: return 25;
- case R26: case X26: case F26: case V26: case CR6EQ: return 26;
- case R27: case X27: case F27: case V27: case CR6UN: return 27;
- case R28: case X28: case F28: case V28: case CR7LT: return 28;
- case R29: case X29: case F29: case V29: case CR7GT: return 29;
- case R30: case X30: case F30: case V30: case CR7EQ: return 30;
- case R31: case X31: case F31: case V31: case CR7UN: return 31;
- default:
- llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
- }
-}
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index dc93f7124a52..84e4175e635b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -33,9 +34,25 @@ namespace {
const MCFixup &Fixup,
bool IsPCRel) const;
virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+
+ class PPCELFRelocationEntry : public ELFRelocationEntry {
+ public:
+ PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+ bool operator<(const PPCELFRelocationEntry &RE) const {
+ return (RE.r_offset < r_offset ||
+ (RE.r_offset == r_offset && RE.Type > Type));
+ }
};
}
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+ : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+ RE.r_addend, *RE.Fixup) {}
+
PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
: MCELFObjectTargetWriter(Is64Bit, OSABI,
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
@@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case PPC::fixup_ppc_br24:
Type = ELF::R_PPC_REL24;
break;
+ case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
}
} else {
switch ((unsigned)Fixup.getKind()) {
@@ -79,9 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
Type = ELF::R_PPC_TPREL16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
}
break;
case PPC::fixup_ppc_lo16:
@@ -90,22 +127,56 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
Type = ELF::R_PPC_TPREL16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
}
break;
- case PPC::fixup_ppc_lo14:
- Type = ELF::R_PPC_ADDR14;
- break;
- case PPC::fixup_ppc_toc:
- Type = ELF::R_PPC64_TOC;
+ case PPC::fixup_ppc_lo16_ds:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ }
break;
- case PPC::fixup_ppc_toc16:
- Type = ELF::R_PPC64_TOC16;
+ case PPC::fixup_ppc_tlsreg:
+ Type = ELF::R_PPC64_TLS;
break;
- case PPC::fixup_ppc_toc16_ds:
- Type = ELF::R_PPC64_TOC16_DS;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ Type = ELF::R_PPC64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ Type = ELF::R_PPC64_TLSLD;
+ break;
+ }
break;
case FK_Data_8:
switch (Modifier) {
@@ -162,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
switch ((unsigned)Fixup.getKind()) {
case PPC::fixup_ppc_ha16:
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
- case PPC::fixup_ppc_toc16_ds:
+ case PPC::fixup_ppc_lo16_ds:
RelocOffset += 2;
break;
default:
@@ -171,6 +241,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
}
}
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset. Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+
+ // Copy to a temporary vector of relocation entries having a different
+ // sort function.
+ std::vector<PPCELFRelocationEntry> TmpRelocs;
+
+ for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+ R != Relocs.end(); ++R) {
+ TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+ }
+
+ // Sort in place by ascending r_offset and descending r_type.
+ array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+ // Copy back to the original vector.
+ unsigned I = 0;
+ for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+ R != TmpRelocs.end(); ++R, ++I) {
+ Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+ R->Symbol, R->r_addend, *R->Fixup);
+ }
+}
+
+
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 37b265e7fd38..86c44f57a5e2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
@@ -31,19 +33,16 @@ enum Fixups {
/// like 'lis'.
fixup_ppc_ha16,
- /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
- /// like 'std'.
- fixup_ppc_lo14,
-
- /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
- fixup_ppc_toc,
+ /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_lo16_ds,
- /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
- fixup_ppc_toc16,
+ /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+ fixup_ppc_tlsreg,
- /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
- /// implied 2 zero bits
- fixup_ppc_toc16_ds,
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models.
+ fixup_ppc_nofixup,
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 215aa40c4afd..a25d7fe64f3a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -17,8 +17,9 @@ using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
PCSymbol = ".";
@@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
void PPCLinuxMCAsmInfo::anchor() { }
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
// ".comm align is in bytes but .align is pow-2."
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 21183024a509..2223cd623cb5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,15 +12,17 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -31,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
Triple TT;
public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : STI(sti), TT(STI.getTargetTriple()) {
+ : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
}
~PPCMCCodeEmitter() {}
- bool is64BitMode() const {
- return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
- }
-
- bool isSVR4ABI() const {
- return TT.isMacOSX() == 0;
- }
-
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -61,6 +56,8 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
@@ -77,11 +74,11 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
- // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the
- // following 'nop'.
+ // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF)
+ if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+ Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
Size = 8;
// Output the constant in big endian byte order.
@@ -114,6 +111,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
+
+ // For special TLS calls, add another fixup for the symbol. Apparently
+ // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
+ // similar that TblGen will not generate a separate case for the latter
+ // two, so this is the only way to get the extra fixup generated.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
+ const MCOperand &MO2 = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ }
return 0;
}
@@ -162,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
return RegBits;
}
@@ -183,17 +187,26 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
- // Add a fixup for the branch target.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16_ds));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo14));
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16_ds));
return RegBits;
}
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_tlsreg));
+ return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
+}
+
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -202,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
MI.getOpcode() == PPC::MFOCRF ||
MI.getOpcode() == PPC::MTCRF8) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
@@ -214,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 4c2578d5dc53..2209f936ec33 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#include "PPCMCTargetDesc.h"
-#include "PPCMCAsmInfo.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "PPCMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
else
RM = Reloc::Static;
}
+ if (CM == CodeModel::Default) {
+ Triple T(TT);
+ if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+ CM = CodeModel::Medium;
+ }
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index a0e4cf3005f2..38a7420d972d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,6 +14,9 @@
#ifndef PPCMCTARGETDESC_H
#define PPCMCTARGETDESC_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
#include "llvm/Support/DataTypes.h"
namespace llvm {
@@ -44,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a143406..d84eb9c6aa03 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
switch (Opcode) {
- default: llvm_unreachable("Unknown PPC branch opcode!");
case PPC::PRED_EQ: return PPC::PRED_NE;
case PPC::PRED_NE: return PPC::PRED_EQ;
case PPC::PRED_LT: return PPC::PRED_GE;
@@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_NU: return PPC::PRED_UN;
case PPC::PRED_UN: return PPC::PRED_NU;
}
+ llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index f872e861bfa7..ad2b01812816 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,11 +14,17 @@
#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
enum Predicate {
- PRED_ALWAYS = (0 << 5) | 20,
PRED_LT = (0 << 5) | 12,
PRED_LE = (1 << 5) | 4,
PRED_EQ = (2 << 5) | 12,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 9103e1232505..446b6854fb5b 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
-#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -25,6 +24,7 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
+ class ImmutablePass;
class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
@@ -37,6 +37,9 @@ namespace llvm {
JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
namespace PPCII {
@@ -53,25 +56,32 @@ namespace llvm {
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 4,
+ MO_PIC_FLAG = 2,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 8,
+ MO_NLP_FLAG = 4,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 16,
+ MO_NLP_HIDDEN_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 224,
+ MO_ACCESS_MASK = 0xf0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 32, MO_HA16 = 64,
+ MO_LO16 = 1 << 4,
+ MO_HA16 = 2 << 4,
+
+ MO_TPREL16_HA = 3 << 4,
+ MO_TPREL16_LO = 4 << 4,
- MO_TPREL16_HA = 96,
- MO_TPREL16_LO = 128
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL16_LO = 5 << 4,
+ MO_TLSLD16_LO = 6 << 4,
+ MO_TOC16_LO = 7 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index cb15dadb7e99..389216278ee4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E500mc", "">;
def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
@@ -52,12 +57,42 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// POPCNTB p5 through p7 popcntb and related instructions
+// VSX p7 vector-scalar instruction set
//===----------------------------------------------------------------------===//
// Register File Description
@@ -73,30 +108,46 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
-def : Processor<"603", G3Itineraries, [Directive603]>;
-def : Processor<"603e", G3Itineraries, [Directive603]>;
-def : Processor<"603ev", G3Itineraries, [Directive603]>;
-def : Processor<"604", G3Itineraries, [Directive604]>;
-def : Processor<"604e", G3Itineraries, [Directive604]>;
-def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"750", G4Itineraries, [Directive750]>;
-def : Processor<"g3", G3Itineraries, [Directive750]>;
-def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"970", G5Itineraries,
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
[Directive970, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"g5", G5Itineraries,
+def : ProcessorModel<"g5", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
Feature64Bit /*, Feature64BitRegs */]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
@@ -104,23 +155,65 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureMFOCRF, FeatureFSqrt,
- FeatureSTFIWX, FeatureISEL,
- Feature64Bit
- /*, Feature64BitRegs */]>;
-def : Processor<"pwr6", G5Itineraries,
+def : ProcessorModel<"a2", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX]>;
+def : ProcessorModel<"pwr3", G5Model,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"pwr7", G5Itineraries,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"pwr6x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr7", G5Model,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
-def : Processor<"ppc64", G5Itineraries,
+def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 15d690bd8970..96a9f0a39006 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,14 +18,13 @@
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,28 +32,30 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
@@ -72,6 +73,7 @@ namespace {
return "PowerPC Assembly Printer";
}
+ MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
virtual void EmitInstruction(const MachineInstr *MI);
@@ -309,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
}
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it. If not, create one. Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+ MCSymbol *&TOCEntry = TOC[Sym];
+
+ // To avoid name clash check if the name already exists.
+ while (TOCEntry == 0) {
+ if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ "C" + Twine(TOCLabelID++)) == 0) {
+ TOCEntry = GetTempSymbol("C", TOCLabelID);
+ }
+ }
+
+ return TOCEntry;
+}
+
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
@@ -349,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
-
-
- // FIXME: We would like an efficient form for this, so we don't have to do
- // a lot of extra uniquing.
- TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
- Create(PICBase, OutContext)));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
@@ -382,14 +399,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- MCSymbol *&TOCEntry = TOC[MOSymbol];
- // To avoid name clash check if the name already exists.
- while (TOCEntry == 0) {
- if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == 0) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
@@ -399,15 +410,299 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case PPC::ADDIStocHA: {
+ // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDIS8. If the global address is external,
+ // has common linkage, is a function address, or is a jump table
+ // address, then generate a TOC entry and reference that. Otherwise
+ // reference the symbol directly.
+ TmpInst.setOpcode(PPC::ADDIS8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ "Invalid operand for ADDIStocHA!");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+ bool IsCommon = false;
+ bool IsAvailExt = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsCommon = GVar && RealGValue->hasCommonLinkage();
+ IsFunction = !GVar;
+ IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::LDtocL: {
+ // Transform %Xd = LDtocL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD. If the global address is external, has
+ // common linkage, or is a jump table address, then reference the
+ // associated TOC entry. Otherwise reference the symbol directly.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
+ MCSymbol *MOSymbol = 0;
+
+ if (MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDItocL: {
+ // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise reference the
+ // symbol directly.
+ TmpInst.setOpcode(PPC::ADDI8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+ MCSymbol *MOSymbol = 0;
+ bool IsExternal = false;
+ bool IsFunction = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ IsExternal = GVar && !GVar->hasInitializer();
+ IsFunction = !GVar;
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+
+ if (IsFunction || IsExternal)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL: {
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR: {
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL: {
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR: {
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDISdtprelHA: {
+ // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X3)
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL: {
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+ .addReg(MI->getOperand(0).getReg()));
return;
case PPC::SYNC:
// In Book E sync is called msync, handle this special case here...
@@ -438,14 +733,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOC, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+ OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -474,6 +769,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -508,7 +822,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppcA2",
"ppce500mc",
"ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
"power6",
+ "power6x",
"power7",
"ppc64"
};
@@ -523,8 +842,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
// FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
+ assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+ }
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -549,16 +871,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
- TmpStr += "$lazy_ptr";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
}
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
- TmpStr += "$tmp";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
}
void PPCDarwinAsmPrinter::
@@ -589,32 +908,51 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- // FIXME: MCize this.
- OutStreamer.EmitRawText(StringRef("\tmflr r0"));
- OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+
+ const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
+ // mflr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ // bcl 20, 31, AnonSymbol
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
- OutStreamer.EmitRawText(StringRef("\tmflr r11"));
- OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
- "-" + AnonSymbol->getName() + ")");
- OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
-
+ // mflr r11
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+ Anon, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(Sub));
+ // mtlr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(Sub).addExpr(Sub)
+ .addReg(PPC::R11));
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
return;
@@ -634,23 +972,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
EmitAlignment(4);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ const MCExpr *LazyPtrLo16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+ OutContext);
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
@@ -703,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -713,7 +1070,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -732,7 +1089,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 21a0fb200f20..bd1c37868110 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -17,21 +17,27 @@
#define DEBUG_TYPE "ppc-branch-select"
#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -45,6 +51,9 @@ namespace {
char PPCBSel::ID = 0;
}
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
/// Pass
///
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2a2abb171fb1..81a54d7015b0 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -31,20 +31,20 @@
#define DEBUG_TYPE "ctrloops"
#include "PPC.h"
-#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -54,6 +54,10 @@ using namespace llvm;
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct PPCCTRLoops : public MachineFunctionPass {
@@ -64,7 +68,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- PPCCTRLoops() : MachineFunctionPass(ID) {}
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -174,15 +180,32 @@ namespace {
};
} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -341,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -368,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -387,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -405,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -477,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -582,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -635,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
(unsigned) PPC::EXTSW_32_64 :
(unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
@@ -652,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 3f87e883b1e4..c8a29a3d2cfe 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
// Vector types are always returned in V2.
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
@@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[
//===----------------------------------------------------------------------===//
-// PowerPC Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-/*
-def CC_PPC : CallingConv<[
- // The first 8 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
- CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
-
- // Common sub-targets passes FP values in F1 - F13
- CCIfType<[f32, f64],
- CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
-
- // The first 12 Vector arguments are passed in altivec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
-
-/*
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>*/
-]>;
-
-*/
-
-//===----------------------------------------------------------------------===//
-// PowerPC System V Release 4 ABI
+// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
-def CC_PPC_SVR4_Common : CallingConv<[
+def CC_PPC32_SVR4_Common : CallingConv<[
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
- CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
// Make sure the i64 words from a long double are either both passed in
// registers or both passed on the stack.
- CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
// FP values are passed in F1 - F8.
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
@@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
-def CC_PPC_SVR4_VarArg : CallingConv<[
- CCDelegateTo<CC_PPC_SVR4_Common>
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
-// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
-// vector arguments in vector registers before putting them on the stack.
-def CC_PPC_SVR4 : CallingConv<[
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCDelegateTo<CC_PPC_SVR4_Common>
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// Helper "calling convention" to handle aggregate by value arguments.
@@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[
// Still, the address of the aggregate copy in the callers stack frame is passed
// in a GPR (or in the parameter list area if all GPRs are allocated) from the
// caller to the callee. The location for the address argument is assigned by
-// the CC_PPC_SVR4 calling convention.
+// the CC_PPC32_SVR4 calling convention.
//
-// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
-def CC_PPC_SVR4_ByVal : CallingConv<[
+def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
- CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
]>;
def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
@@ -164,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
F27, F28, F29, F30, F31, CR2, CR3, CR4,
V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 252a2d159ec3..64787185138b 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -12,15 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCTargetMachine.h"
-#include "PPCRelocations.h"
#include "PPC.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@@ -68,6 +68,7 @@ namespace {
unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@@ -141,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
@@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
}
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
@@ -252,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index caf7bf2be793..3244b904ee64 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
#include "PPCFrameLowering.h"
-#include "PPCInstrInfo.h"
#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -115,16 +116,25 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineRegisterInfo::livein_iterator
I = MF->getRegInfo().livein_begin(),
E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(I->first);
+ unsigned RegNo = TRI->getEncodingValue(I->first);
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(*I);
- if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
}
// If no registers are used, turn this into a copy.
@@ -179,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -198,13 +226,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
- bool DisableRedZone = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::NoRedZone);
- // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
- // still generate stackless code if all local vars are reg-allocated.
- // Try: (FrameSize <= 224
- // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
+ bool DisableRedZone = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -213,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -231,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -240,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -261,7 +295,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -270,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
@@ -300,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get operating system
@@ -769,14 +828,15 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MF.getRegInfo().setPhysRegUnused(LR);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -801,29 +861,24 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -831,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -895,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -914,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
}
// Check whether the frame pointer register is allocated. If so, make sure it
@@ -948,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinReg =
- std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
- getPPCRegisterNumbering(MinG8R));
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
if (Subtarget.isPPC64()) {
LowerBound -= (31 - MinReg + 1) * 8;
@@ -1009,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF))
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
}
bool
@@ -1046,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// save slot via GPR12 (available in the prolog for 32- and 64-bit).
if (Subtarget.isPPC64()) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
.addReg(PPC::X12,
getKillRegState(true))
.addImm(8)
@@ -1087,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (isPPC64) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
.addImm(8)
.addReg(PPC::X1));
RestoreOp = PPC::MTCRF8;
@@ -1103,15 +1198,56 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
if (CR3Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
if (CR4Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
}
bool
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 4d957b91c7bb..6f5f9368c6c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,9 +15,9 @@
#include "PPC.h"
#include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
namespace llvm {
class PPCSubtarget;
@@ -27,11 +27,14 @@ class PPCFrameLowering: public TargetFrameLowering {
public:
PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -40,16 +43,23 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -139,6 +149,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6ed1fb9e6a3c..4bf1e3396429 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) {
}
// Do not allow MTCTR and BCTRL to be in the same dispatch group.
- if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+ if (HasCTRSet && Opcode == PPC::BCTRL)
return NoopHazard;
// If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 254fea67fc4e..95efc11b53c1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,24 +14,30 @@
#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
-#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
@@ -46,7 +52,9 @@ namespace {
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {}
+ PPCSubTarget(*TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
@@ -59,6 +67,8 @@ namespace {
return true;
}
+ virtual void PostprocessISelDAG();
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -110,10 +120,10 @@ namespace {
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
- /// immediate field. Because preinc imms have already been validated, just
- /// accept it.
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ if (N.getOpcode() == ISD::TargetConstant ||
N.getOpcode() == ISD::TargetGlobalAddress) {
Out = N;
return true;
@@ -122,18 +132,6 @@ namespace {
return false;
}
- /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
- /// index field. Because preinc imms have already been validated, just
- /// accept it.
- bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
- N.getOpcode() == ISD::TargetGlobalAddress)
- return false;
-
- Out = N;
- return true;
- }
-
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
@@ -154,6 +152,12 @@ namespace {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -1040,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
SDValue Offset = LD->getOffset();
- if (isa<ConstantSDNode>(Offset) ||
+ if (Offset.getOpcode() == ISD::TargetConstant ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
unsigned Opcode;
@@ -1107,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
- SDValue Ops[] = { Offset, Base, Chain };
+ SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
@@ -1268,11 +1272,277 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
+ case PPCISD::TOC_ENTRY: {
+ assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ break;
+
+ // The first source operand is a TargetGlobalAddress or a
+ // TargetJumpTable. If it is an externally defined symbol, a symbol
+ // with common linkage, a function address, or a jump table address,
+ // or if we are generating code for large code model, we generate:
+ // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+ SDValue GA = N->getOperand(0);
+ SDValue TOCbase = N->getOperand(1);
+ SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+ TOCbase, GA);
+
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GValue = G->getGlobal();
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ assert((GVar || isa<Function>(RealGValue)) &&
+ "Unexpected global value subclass!");
+
+ // An external variable is one without an initializer. For these,
+ // for variables with common linkage, and for Functions, generate
+ // the LDtocL form.
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+ }
+
+ return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+ SDValue(Tmp, 0), GA);
+ }
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
}
return SelectCode(N);
}
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
/// createPPCISelDag - This pass converts a legalized DAG into a
@@ -1282,3 +1552,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
return new PPCDAGToDAGISel(TM);
}
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+ false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index adf78d5233ae..16fc8a0e3726 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12,15 +12,10 @@
//===----------------------------------------------------------------------===//
#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +24,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -36,20 +36,20 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
@@ -112,6 +116,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -132,11 +137,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
@@ -144,26 +151,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT()) {
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- }
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+ // frin does not implement "ties to even." Thus, this is safe only in
+ // fast-math mode.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+ // These need to set FE_INEXACT, and use a custom inserter.
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ }
+ }
+
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ if (Subtarget->hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
@@ -206,6 +245,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -285,15 +332,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (PPCSubTarget.hasFPCVT()) {
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -347,6 +407,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
@@ -361,6 +436,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -373,12 +449,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
- for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- setOperationAction(ISD::FSQRT, VT, Expand);
- }
-
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -393,6 +463,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
@@ -401,6 +475,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -429,6 +509,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -449,6 +531,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -482,15 +570,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
- maxStoresPerMemset = 32;
- maxStoresPerMemsetOptSize = 16;
- maxStoresPerMemcpy = 32;
- maxStoresPerMemcpyOptSize = 8;
- maxStoresPerMemmove = 32;
- maxStoresPerMemmoveOptSize = 8;
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- benefitFromCodePlacementOpt = true;
}
}
@@ -521,6 +608,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
@@ -536,16 +625,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
- case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
- case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
- case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
- case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
- case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -555,13 +641,25 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
- case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
+ case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
+ case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
}
}
@@ -995,7 +1093,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1044,7 +1142,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1107,7 +1205,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1145,15 +1243,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
+ bool isLoad = true;
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
} else
return false;
@@ -1161,7 +1263,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
AM = ISD::PRE_INC;
return true;
}
@@ -1172,6 +1292,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -1308,19 +1432,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (!is64bit)
+ llvm_unreachable("only local-exec is currently supported for ppc32");
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, TPOffsetHi);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ }
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
- return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ llvm_unreachable("Unknown TLS model!");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1654,18 +1840,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1688,11 +1874,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1815,7 +2001,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
- CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1876,7 +2062,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -2068,13 +2254,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -2409,6 +2598,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
@@ -2995,7 +3187,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+ unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3128,8 +3320,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
- CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3231,7 +3426,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
- // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
@@ -3247,17 +3442,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// Emit tail call.
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
@@ -3279,7 +3463,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- if (CallOpc == PPCISD::BCTRL_SVR4) {
+ if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -3290,9 +3474,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
// Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP_SVR4;
+ CallOpc = PPCISD::CALL_NOP;
}
}
@@ -3401,11 +3585,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
bool Result;
if (Outs[i].IsFixed) {
- Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
- CCInfo);
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
} else {
- Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo);
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
}
if (Result) {
@@ -3418,7 +3602,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
} else {
// All arguments are treated the same.
- CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
@@ -3429,7 +3613,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
@@ -4323,14 +4507,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -4355,12 +4533,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -4466,6 +4649,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4553,37 +4751,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- PPCISD::FCTIDZ,
+ (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
break;
}
// Convert the FP value to an int value through memory.
- SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+ bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
- if (Op.getValueType() == MVT::i32)
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
false, false, false, 0);
}
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
// When converting to single-precision, we actually need to convert
@@ -4597,6 +4830,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
+ !PPCSubTarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -4630,44 +4864,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
+
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
- "Unhandled SINT_TO_FP type in custom expander!");
+ "Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, 2, MVT::i32, MMO);
+ } else {
+ assert(PPCSubTarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
Op.getOperand(0));
- // STD the extended value into the stack slot.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 8, 8);
- SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
- SDValue Store =
- DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
- Ops, 4, MVT::i64, MMO);
- // Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, false, 0);
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
// FCFID it and return it.
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -4697,12 +4956,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- std::vector<EVT> NodeTys;
SDValue MFFSreg, InFlag;
// Save FP Control Word to register
- NodeTys.push_back(MVT::f64); // return register
- NodeTys.push_back(MVT::Glue); // unused in this context
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
@@ -4936,11 +5196,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5036,23 +5306,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
@@ -5326,9 +5579,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(3), // RHS
DAG.getConstant(CompareOpc, MVT::i32)
};
- std::vector<EVT> VTs;
- VTs.push_back(Op.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -5470,11 +5721,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
Op.getDebugLoc());
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
// Lower 64-bit shifts.
@@ -5528,50 +5783,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(1));
- // This sequence changes FPSCR to do round-to-zero, adds the two halves
- // of the long double, and puts FPSCR back the way it was. We do not
- // actually model FPSCR.
- std::vector<EVT> NodeTys;
- SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
- NodeTys.push_back(MVT::f64); // Return register
- NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
- Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
- MFFSreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(31, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(30, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64); // result of add
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = Lo;
- Ops[1] = Hi;
- Ops[2] = InFlag;
- Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
- FPreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64);
- Ops[0] = DAG.getConstant(1, MVT::i32);
- Ops[1] = MFFSreg;
- Ops[2] = FPreg;
- Ops[3] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
- FPreg = Result.getValue(0);
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
// We know the low half is about to be thrown away, so just use something
// convenient.
@@ -5663,7 +5876,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5767,7 +5980,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -5782,9 +5995,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -5802,24 +6244,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
- // The SelectPred is ((BI << 5) | BO) for a BCC
- unsigned BO = SelectPred & 0xF;
- assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
- unsigned TrueOpNo, FalseOpNo;
- if (BO == 12) {
- TrueOpNo = 2;
- FalseOpNo = 3;
- } else {
- TrueOpNo = 3;
- FalseOpNo = 2;
- SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
}
BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(TrueOpNo).getReg())
- .addReg(MI->getOperand(FalseOpNo).getReg())
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
+ .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
+ .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6052,7 +6494,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6155,6 +6597,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
.addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::FRINDrint ||
+ MI->getOpcode() == PPC::FRINSrint) {
+ bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ // Perform the rounding.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+ .addReg(Src);
+
+ // Compare the results.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+ .addReg(Dest).addReg(Src);
+
+ // If the results were not equal, then set the FPSCR XX bit.
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+ BB->addSuccessor(midMBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+
+ // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+ // the FI bit here because that will not automatically set XX also,
+ // and XX is what libm interprets as the FE_INEXACT flag.
+ BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+ BB->addSuccessor(exitMBB);
+
+ BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6167,6 +6678,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPThreeHalves =
+ DAG.getConstantFP(1.5, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPThreeHalves, FPThreeHalves,
+ FPThreeHalves, FPThreeHalves);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -6193,7 +6837,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV);
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV,
+ N->getOperand(1).getOperand(1));
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+
+ SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV, DCI);
+ if (RV.getNode() != 0)
+ return RV;
+ }
+ }
+ break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -6240,8 +6949,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
- N->getOperand(2), N->getOperand(3));
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -6251,7 +6967,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
- N->getOperand(1).getValueType() == MVT::i16)) {
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@@ -6272,7 +6991,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
@@ -6283,8 +7005,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
- DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
- LD->getMemoryVT(), LD->getMemOperand());
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -6384,14 +7107,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
// Create the PPCISD altivec 'dot' comparison node.
- std::vector<EVT> VTs;
SDValue Ops[] = {
LHS.getOperand(2), // LHS of compare
LHS.getOperand(3), // RHS of compare
DAG.getConstant(CompareOpc, MVT::i32)
};
- VTs.push_back(LHS.getOperand(2).getValueType());
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
@@ -6543,6 +7264,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6727,13 +7451,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
@@ -6754,16 +7481,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
@@ -6773,6 +7499,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b3c7f9c28d40..7157b70d8622 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,9 +16,10 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace PPCISD {
@@ -35,14 +36,21 @@ namespace llvm {
/// was temporarily in the f64 operand.
FCFID,
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
/// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
/// operand, producing an f64 value containing the integer representation
/// of that FP value.
FCTIDZ, FCTIWZ,
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
// three v4f32 operands and producing a v4f32 result.
@@ -90,17 +98,10 @@ namespace llvm {
/// code.
SRL, SRA, SHL,
- /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
- /// registers.
- EXTSW_32,
-
/// CALL - A direct function call.
- /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
- CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
-
- /// NOP - Special NOP which follows 64-bit SVR4 calls.
- NOP,
+ CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
@@ -108,7 +109,7 @@ namespace llvm {
/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
/// BCTRL instruction.
- BCTRL_Darwin, BCTRL_SVR4,
+ BCTRL,
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -119,6 +120,12 @@ namespace llvm {
/// are undefined.
MFCR,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
@@ -138,26 +145,13 @@ namespace llvm {
/// an optional input flag argument.
COND_BRANCH,
- // The following 5 instructions are used only as part of the
- // long double-to-int conversion sequence.
-
- /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
- /// register.
- MFFS,
-
- /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
- MTFSB0,
-
- /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
- MTFSB1,
-
- /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
- /// rounding towards zero. It has flags added so it won't move past the
- /// FPSCR-setting instructions.
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
FADDRTZ,
- /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
- MTFSF,
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
/// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
/// reserve indexed. This is used to implement atomic operations.
@@ -178,20 +172,111 @@ namespace llvm {
CR6SET,
CR6UNSET,
- /// STD_32 - This is the STD instruction for use with "32-bit" registers.
- STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym@got@tprel@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym@got@tprel@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsgd@ha.
+ ADDIS_TLSGD_HA,
+
+ /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsgd@l.
+ ADDI_TLSGD_L,
+
+ /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsgd).
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsld@ha.
+ ADDIS_TLSLD_HA,
+
+ /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsld@l.
+ ADDI_TLSLD_L,
+
+ /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsld).
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+ /// local-dynamic TLS model, produces an ADDIS8 instruction
+ /// that adds X3 to sym@dtprel@ha. The Chain operand is needed
+ /// to tie this in place following a copy to %X3 from the result
+ /// of a GET_TLSLD_ADDR.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@dtprel@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
- STBRX,
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
/// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
/// then puts it in the bottom bits of the GPRC. TYPE can be either i16
/// or i32.
- LBRX
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym@toc@ha.
+ ADDIS_TOC_HA,
+
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ LD_TOC_L,
+
+ /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+ /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+ /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ ADDI_TOC_L
};
}
@@ -241,6 +326,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -249,7 +335,7 @@ namespace llvm {
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual EVT getSetCCResultType(EVT VT) const;
@@ -315,6 +401,12 @@ namespace llvm {
MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -358,18 +450,21 @@ namespace llvm {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If
- /// 'IsZeroVal' is true, that means it's safe to return a
- /// non-scalar-integer type, e.g. empty string source, constant, or loaded
- /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
- /// constant so it does not need to be loaded.
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal, bool MemcpyStrSrc,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -415,7 +510,7 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
- SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -525,6 +620,12 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9711452ec46a..fa5b65f0ba2d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,8 +30,12 @@ def symbolLo64 : Operand<i64> {
let EncoderMethod = "getLO16Encoding";
}
def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
+ let MIOperandInfo = (ops i64imm:$imm);
}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+}
+def tlsgd : Operand<i64> {}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -62,123 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
let Defs = [LR8] in
def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL8_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA8_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
- }
- let Uses = [CTR8, RM] in {
- def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
-// ELF 64 ABI Calls = Darwin ABI Calls
-// Used to define BL8_ELF and BLA8_ELF
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL8_ELF : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
- let isCodeGenOnly = 1 in
- def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
- def BLA8_ELF : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+ def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
- let isCodeGenOnly = 1 in
- def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins aaddr:$func),
"bla $func\n\tnop", BrB,
- [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
+ [(PPCcall_nop (i64 imm:$func))]>;
}
- let Uses = [X11, CTR8, RM] in {
- def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
}
}
// Calls
-def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
- (BL8_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
- (BL8_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_NOP_ELF tglobaladdr:$dst)>;
-
-def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
- (BL8_ELF texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
- (BL8_NOP_ELF texternalsym:$dst)>;
-
-def : Pat<(PPCnop),
- (NOP)>;
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set G8RC:$dst,
- (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
- [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
}
// Instructions to support atomic operations
def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr", LdStLDARX,
- [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+ [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
"stdcx. $rS, $dst", LdStSTDCX,
- [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -197,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
+let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
- isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
- let isReturn = 1 in {
- def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
- }
-
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
-}
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
@@ -223,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+}
+
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -232,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
- let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst">;
- def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst">;
- }
-}
-// 64-but CR instructions
+// 64-bit CR instructions
def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let isCodeGenOnly = 1 in
def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
"#MFCR8pseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -254,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -262,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
"mfspr $rT, 268", SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -279,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
- [(set G8RC:$result,
- (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
@@ -302,126 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
- [(set G8RC:$rD, immSExt16:$imm)]>;
+ [(set i64:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
- [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
// Logical ops with immediate.
def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
+def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB@tls", IntSimple,
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
PPC970_DGroup_Cracked;
def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+ [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+ [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
}
def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"neg $rT, $rA", IntSimple,
- [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+ [(set i64:$rT, (ineg i64:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
}
def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhd $rT, $rA, $rB", IntMulHW,
- [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhdu $rT, $rA, $rB", IntMulHWU,
- [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
"cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
@@ -434,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"sld $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srd $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
let Defs = [CARRY] in {
def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srad $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
}
def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
-/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
-def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntSimple,
- [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
"sradi $rA, $rS, $SH", IntRotateDI,
- [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
"cntlzd $rA, $rS", IntGeneral,
- [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divd $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divdu $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulld $rT, $rA, $rB", IntMulHD,
- [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let isCommutable = 1 in {
@@ -512,7 +516,7 @@ def RLWINM8 : MForm_2<21,
[]>;
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
} // End FXU Operations.
@@ -527,94 +531,96 @@ def ISEL8 : AForm_4<31, 15,
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
- ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLHAU,
- []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -624,25 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtoc",
- [(set G8RC:$rD,
- (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocJTI",
- [(set G8RC:$rD,
- (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocCPT",
- [(set G8RC:$rD,
- (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
let RST = 2, DS = 2 in
def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
- [(PPCload_toc G8RC:$reg)]>, isPPC64;
+ [(PPCload_toc i64:$reg)]>, isPPC64;
let RST = 2, DS = 10, RA = 1 in
def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
@@ -651,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
}
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
- [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
-
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -671,118 +683,168 @@ def : Pat<(PPCload ixaddr:$src),
def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
+// Support for medium and large code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDIStocHA",
+ [(set i64:$rD,
+ (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
+ isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+ "#LDtocL",
+ [(set i64:$rD,
+ (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+ "#ADDItocL",
+ [(set i64:$rD,
+ (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISgotTprelHA",
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+ "#LDgotTprelL",
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+ isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsgdHA",
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsgdL",
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDIStlsldHA",
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDItlsldL",
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+ "#ADDISdtprelHA",
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+ "#ADDIdtprelL",
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
- [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
-let PPC970_Unit = 2 in {
-
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
- [(set ptr_rc:$ea_res,
- (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
-
-// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
-def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
- "std $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
-def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
- "stdx $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
- PPC970_DGroup_Cracked;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
//===----------------------------------------------------------------------===//
@@ -793,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
"fcfid $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
"fctidz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfidu $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfids $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfidus $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiduz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwuz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
@@ -805,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
//
// Extensions and truncates to/from 32-bit regs.
-def : Pat<(i64 (zext GPRC:$in)),
- (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
0, 32)>;
-def : Pat<(i64 (anyext GPRC:$in)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
-def : Pat<(i32 (trunc G8RC:$in)),
- (EXTRACT_SUBREG G8RC:$in, sub_32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
@@ -838,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src),
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
// amounts.
-def : Pat<(sra G8RC:$rS, GPRC:$rB),
- (SRAD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(srl G8RC:$rS, GPRC:$rB),
- (SRD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(shl G8RC:$rS, GPRC:$rB),
- (SLD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
// SHL/SRL
-def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
- (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
-def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
// ROTL
-def : Pat<(rotl G8RC:$in, GPRC:$sh),
- (RLDCL G8RC:$in, GPRC:$sh, 0)>;
-def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, imm:$imm, 0)>;
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
@@ -866,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
- (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
- (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS8 G8RC:$in, tconstpool:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS8 G8RC:$in, tjumptable:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS8 G8RC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index ba58c3e4ac88..a5ba4c8aebef 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -158,34 +158,75 @@ def vecspltisw : PatLeaf<(build_vector), [{
return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
}], VSPLTISW_get_imm>;
-def V_immneg0 : PatLeaf<(build_vector), [{
- return PPC::isAllNegativeZeroVector(N);
-}]>;
-
//===----------------------------------------------------------------------===//
// Helpers for defining instructions that directly correspond to intrinsics.
-// VA1a_Int - A VAForm_1a intrinsic definition.
-class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
: VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
-// VX1_Int - A VXForm_1 intrinsic definition.
-class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
: VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
-// VX2_Int - A VXForm_2 intrinsic definition.
-class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
: VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
!strconcat(opc, " $vD, $vB"), VecFP,
- [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -217,129 +258,136 @@ def DSTST64 : DSS_Form<374, (outs),
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
"dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
"mfvscr $vD", LdStStore,
- [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
"mtvscr $vB", LdStLoad,
- [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
"lvebx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
"lvehx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
"lvewx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
"lvx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
"lvxl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
"lvsl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
"lvsr $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
"stvebx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
"stvehx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
"stvewx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
"stvx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
"stvxl $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
- (fneg VRRC:$vB))))]>;
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
-def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
-def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
-def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
-def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
-def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", VecFP,
- [(set VRRC:$vD,
- (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD,
+ (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddfp $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddubm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
-def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
-def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
-def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
-def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
-def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
-def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vand $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vandc $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
- (vnot_ppc VRRC:$vB)))]>;
+ [(set v4i32:$vD, (and v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfux $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctsxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctuxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -347,203 +395,237 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
let VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfsx $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
"vctuxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfux $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
"vctsxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
}
-def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
-def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
-
-def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
-def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
-def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
-def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
-def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
-def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
-
-def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
-def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
-def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
-def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
-def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
-def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
-def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
-def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
-def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
-def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
-def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
-def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
-def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
-def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghh $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghw $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglh $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglw $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
-
-def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
-def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
-def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
-def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
-def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
-def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
-
-def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
-def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
-def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
-def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
-def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
-def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
-def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
-def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+ [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
-def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
-def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
-def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
-def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
-def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
-def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
-def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubfp $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsububm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
-def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
-def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
-def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
-def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
-def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
-def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
-def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
-def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
-def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
-def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vnor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
- VRRC:$vB)))]>;
+ [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+ v4i32:$vB)))]>;
def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vxor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
-def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
-def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
-def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
-def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
-def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
-def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
-def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
-def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vsplth $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltw $vD, $vB, $UIMM", VecPerm,
- [(set VRRC:$vD,
- (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+ [(set v16i8:$vD,
+ (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
-def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
-def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
-def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
-def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
-def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
-def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
-def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisb $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltish $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisw $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
// Vector Pack.
-def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
-def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
-def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
-def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
-def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuhum $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD,
- (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+ [(set v16i8:$vD,
+ (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuwum $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD,
- (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+ [(set v16i8:$vD,
+ (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
// Vector Unpack.
-def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
-def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
-def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
-def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
-def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
-def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
@@ -582,10 +664,16 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+let isCodeGenOnly = 1 in
def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
"vxor $vD, $vD, $vD", VecFP,
- [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+ [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+ "vspltisw $vD, -1", VecFP,
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
+} // VALU Operations.
//===----------------------------------------------------------------------===//
// Additional Altivec Patterns
@@ -596,31 +684,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
// * 32-bit
-def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, $rA, $rB)>;
// * 64-bit
-def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
// Stores.
-def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
- (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
// Bit conversions.
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
@@ -642,82 +730,99 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
-def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
- (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
-def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUWUM VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
// Match vmrg*(x,x)
-def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLW VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
// Logical Operations
-def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
-def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
- (VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
- (VANDC VRRC:$A, VRRC:$B)>;
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+ (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+ (VANDC $A, $B)>;
-def : Pat<(fmul VRRC:$vA, VRRC:$vB),
- (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
+ (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
// Fused multiply add and multiply sub for packed float. These are represented
// separately from the real instructions above, for operations that must have
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
-def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+ (VPERM $vA, $vB, $vC)>;
-def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
- (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
// Vector shifts
-def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
// Float to integer and integer to float conversions
-def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
- (VCTSXS_0 VRRC:$vA)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
- (VCTUXS_0 VRRC:$vA)>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
- (VCFSX_0 VRRC:$vA)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
- (VCFUX_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21fc..400b7e367bfe 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
let CR = 0;
}
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
// This is probably 1.7.9, but I don't have the reference that uses this
// numbering scheme...
class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- string cstr, InstrItinClass itin, list<dag>pattern>
+ InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
bits<5> rT;
bit RC = 0; // set by isDOT
let Pattern = pattern;
- let Constraints = cstr;
let Inst{6} = 0;
let Inst{7-14} = FM;
@@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
bits<5> RT;
bits<5> RA;
bits<5> RB;
- bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12).
- bits<3> CR;
+ bits<5> COND;
let Pattern = pattern;
let Inst{6-10} = RT;
let Inst{11-15} = RA;
let Inst{16-20} = RB;
- let Inst{21-23} = CR;
- let Inst{24-25} = BIBO{6-5};
+ let Inst{21-25} = COND;
let Inst{26-30} = xo;
let Inst{31} = 0;
}
@@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
let PPC64 = 0;
let Pattern = pattern;
let Inst{31-0} = 0;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9d68446f536..69c54ed084be 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,12 +12,13 @@
//===----------------------------------------------------------------------===//
#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
+#include "PPCHazardRecognizers.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -28,16 +29,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -60,7 +55,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::LD:
case PPC::LWZ:
case PPC::LFS:
case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::LVX:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::STD:
case PPC::STW:
case PPC::STFS:
case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::STVX:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -141,7 +148,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI)
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
@@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use X11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(PPC::X11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
@@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,25 +577,17 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- DestReg), FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
- FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::X11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
@@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
- case PPC::BL8_NOP_ELF:
- case PPC::BLA8_NOP_ELF:
+ case PPC::BL8_NOP:
+ case PPC::BLA8_NOP:
return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea435b..635e3480b06d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6ee045a2c7c9..ab907622beeb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
]>;
def SDT_PPClbrx : SDTypeProfile<1, 2, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,32 +57,36 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
-def SDT_PPCnop : SDTypeProfile<0, 0, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
-def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
-// This sequence is used for long double->int conversions. It changes the
-// bits in the FPSCR which is not modelled.
-def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
- [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
- [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
- SDTCisVT<3, f64>]>,
- [SDNPInGlue]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -91,6 +99,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+ [SDNPHasChain]>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -99,10 +121,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
-def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
-def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
- [SDNPHasChain, SDNPMayStore]>;
-
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -110,16 +128,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -130,13 +144,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -144,6 +154,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -167,6 +185,12 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to support medium and large code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -258,6 +282,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -294,9 +350,6 @@ def s16imm : Operand<i32> {
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
}
-def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
- let PrintMethod = "printS16X4ImmOperand";
-}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
@@ -324,26 +377,37 @@ def crbitm: Operand<i8> {
let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
-// that doesn't matter.
-def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
- (ops (i32 20), (i32 zero_reg))> {
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
}
// Define PowerPC specific addressing mode.
@@ -352,9 +416,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -381,17 +448,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
- [(set GPRC:$result,
- (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
- def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+ GPRC_NOR0:$T, GPRC_NOR0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+ G8RC_NOX0:$T, G8RC_NOX0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
@@ -418,10 +490,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
- def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
- "b${p:cc}lr ${p:reg}", BrB,
- [(retflag)]>;
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ [(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
}
@@ -453,46 +524,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
}
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
- }
- let Uses = [CTR, RM] in {
- def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
}
}
-// SVR4 ABI Calls.
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL_SVR4 : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_SVR4 : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB,
- [(PPCcall_SVR4 (i32 imm:$func))]>;
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
- def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
}
}
-
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -511,6 +565,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
[]>;
+let isCodeGenOnly = 1 in {
+
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
@@ -524,6 +580,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
"b $dst", BrB,
[]>;
+}
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
@@ -531,6 +588,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -566,93 +639,90 @@ let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set GPRC:$dst,
- (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
- [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
- [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
- [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
}
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
"lwarx $rD, $src", LdStLWARX,
- [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+ [(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
"stwcx. $rS, $dst", LdStSTWCX,
- [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
@@ -666,94 +736,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load iaddr:$src))]>;
+ [(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
"lfs $rD, $src", LdStLFD,
- [(set F4RC:$rD, (load iaddr:$src))]>;
+ [(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
- [(set F8RC:$rD, (load iaddr:$src))]>;
+ [(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -763,32 +833,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load xaddr:$src))]>;
+ [(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
"lhbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
"lwbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
"lfsx $frD, $src", LdStLFD,
- [(set F4RC:$frD, (load xaddr:$src))]>;
+ [(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
- [(set F8RC:$frD, (load xaddr:$src))]>;
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
//===----------------------------------------------------------------------===//
@@ -799,137 +876,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(store GPRC:$rS, iaddr:$src)]>;
+ [(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
"stfs $rS, $dst", LdStSTFD,
- [(store F4RC:$rS, iaddr:$dst)]>;
+ [(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
"stfd $rS, $dst", LdStSTFD,
- [(store F8RC:$rS, iaddr:$dst)]>;
+ [(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
// Indexed (r+r) Stores.
-//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(store GPRC:$rS, xaddr:$dst)]>,
- PPC970_DGroup_Cracked;
-
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ [(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
- (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
- (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
"stfiwx $frS, $dst", LdStSTFD,
- [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
"stfsx $frS, $dst", LdStSTFD,
- [(store F4RC:$frS, xaddr:$dst)]>;
+ [(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
"stfdx $frS, $dst", LdStSTFD,
- [(store F8RC:$frS, xaddr:$dst)]>;
+ [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -939,68 +1007,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
PPC970_DGroup_Cracked;
def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
- [(set GPRC:$rD, (add GPRC:$rA,
+ [(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
- [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+ [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
- [(set GPRC:$rD, immSExt16:$imm)]>;
+ [(set i32:$rD, immSExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
"lis $rD, $imm", IntSimple,
- [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
@@ -1013,38 +1079,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
let PPC970_Unit = 1 in { // FXU Operations.
def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"slw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"srw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
let Defs = [CARRY] in {
def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"sraw $rA, $rS, $rB", IntShift,
- [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
}
@@ -1052,17 +1118,17 @@ let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CARRY] in {
def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
"srawi $rA, $rS, $SH", IntShift,
- [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
}
def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
"cntlzw $rA, $rS", IntGeneral,
- [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+ [(set i32:$rA, (ctlz i32:$rS))]>;
def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
"cmpw $crD, $rA, $rB", IntCompare>;
@@ -1080,16 +1146,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
let Uses = [RM] in {
def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
"fctiwz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
"frsp $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ // The frin -> nearbyint mapping is valid only in fast-math mode.
+ def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+ // These pseudos expand to rint but also set FE_INEXACT when the result does
+ // not equal the argument.
+ let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+ def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+ "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+ def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+ "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ }
+
+ def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
"fsqrt $frD, $frB", FPSqrt,
- [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
"fsqrts $frD, $frB", FPSqrt,
- [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
@@ -1099,31 +1203,44 @@ let Uses = [RM] in {
/// sneak into a d-group with a store).
def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
- []>, // (set F4RC:$frD, F4RC:$frB)
+ []>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+ [(set f32:$frD, (fabs f32:$frB))]>;
def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+ [(set f64:$frD, (fabs f64:$frB))]>;
def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+ [(set f32:$frD, (fneg f32:$frB))]>;
def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+ [(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fre $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fres $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frsqrte $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frsqrtes $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
-
// XL-Form instructions. condition register logical ops.
//
@@ -1141,6 +1258,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
"cror $CRD, $CRA, $CRB", BrCR,
[]>;
+let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
@@ -1158,6 +1276,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
"crxor 6, 6, 6", BrCR,
[(PPCcr6unset)]>;
}
+}
// XFX-Form instructions. Instructions that deal with SPRs.
//
@@ -1166,7 +1285,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1193,6 +1312,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1207,6 +1349,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
// instruction to keep the register allocator from becoming confused.
//
// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"#MFCRpseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1219,38 +1362,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
-// Instructions to manipulate FPSCR. Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IntMTFSB0,
- [(PPCmtfsb0 (i32 imm:$FM))]>,
+ "mtfsb0 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IntMTFSB0,
- [(PPCmtfsb1 (i32 imm:$FM))]>,
+ "mtfsb1 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- // MTFSF does not actually produce an FP result. We pretend it copies
- // input reg B to the output. If we didn't do this it would look like the
- // instruction had no outputs (because we aren't modelling the FPSCR) and
- // it would be deleted.
- def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
- (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
- "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
- [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
- F8RC:$rT, F8RC:$FRB))]>,
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+ "mtfsf $FM, $rT", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
"mffs $rT", IntMFFS,
- [(set F8RC:$rT, (PPCmffs))]>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
- def FADDrtz: AForm_2<63, 21,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ [(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1261,61 +1395,61 @@ let PPC970_Unit = 1 in { // FXU Operations.
//
def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let Defs = [CARRY] in {
def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
}
def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divw $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divwu $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhwu $rT, $rA, $rB", IntMulHWU,
- [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mullw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
let Defs = [CARRY] in {
def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
}
def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"neg $rT, $rA", IntSimple,
- [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+ [(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
@@ -1327,43 +1461,41 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
- (fneg F8RC:$FRB))))]>;
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
- (fneg F4RC:$FRB))))]>;
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1372,50 +1504,50 @@ let Uses = [RM] in {
def FSELD : AForm_1<63, 23,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FSELS : AForm_1<63, 23,
(outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fadds $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
def FDIV : AForm_2<63, 18,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fdiv $FRT, $FRA, $FRB", FPDivD,
- [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
def FDIVS : AForm_2<59, 18,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fdivs $FRT, $FRA, $FRB", FPDivS,
- [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
def FMUL : AForm_3<63, 25,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
"fmul $FRT, $FRA, $FRC", FPFused,
- [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
def FMULS : AForm_3<59, 25,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
"fmuls $FRT, $FRA, $FRC", FPGeneral,
- [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fsub $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fsubs $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
@@ -1455,47 +1587,43 @@ def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not GPRC:$in),
- (NOR GPRC:$in, GPRC:$in)>;
+def NOT : Pat<(not i32:$in),
+ (NOR $in, $in)>;
// ADD an arbitrary immediate.
-def : Pat<(add GPRC:$in, imm:$imm),
- (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
// OR an arbitrary immediate.
-def : Pat<(or GPRC:$in, imm:$imm),
- (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// XOR an arbitrary immediate.
-def : Pat<(xor GPRC:$in, imm:$imm),
- (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// SUBFIC
-def : Pat<(sub immSExt16:$imm, GPRC:$in),
- (SUBFIC GPRC:$in, imm:$imm)>;
+def : Pat<(sub immSExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
// SHL/SRL
-def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
-def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
// ROTL
-def : Pat<(rotl GPRC:$in, GPRC:$sh),
- (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
-def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
// RLWNM
-def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
- (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
// Calls
-def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
- (BL_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
- (BL_Darwin texternalsym:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
- (BL_SVR4 tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
- (BL_SVR4 texternalsym:$dst)>;
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
@@ -1518,28 +1646,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
- (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
- (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS GPRC:$in, tglobaladdr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS GPRC:$in, tconstpool:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS GPRC:$in, tjumptable:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS GPRC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
-def : Pat<(sra GPRC:$rS, GPRC:$rB),
- (SRAW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(srl GPRC:$rS, GPRC:$rB),
- (SRW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(shl GPRC:$rS, GPRC:$rB),
- (SLW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
def : Pat<(zextloadi1 iaddr:$src),
(LBZ iaddr:$src)>;
@@ -1562,8 +1690,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
-def : Pat<(f64 (fextend F4RC:$src)),
- (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
@@ -1575,5 +1703,15 @@ def : Pat<(membarrier (i32 imm /*ll*/),
def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index aba27399d6da..cfcd7490ed0d 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -15,10 +15,10 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -115,7 +115,7 @@ asm(
"lwz r2, 208(r1)\n" // stub's frame
"lwz r4, 8(r2)\n" // stub's lr
"li r5, 0\n" // 0 == 32 bit
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
"mtctr r3\n"
// Restore all int arg registers
"lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
@@ -178,7 +178,7 @@ asm(
"lwz 5, 104(1)\n" // stub's frame
"lwz 4, 4(5)\n" // stub's lr
"li 5, 0\n" // 0 == 32 bit
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"mtctr 3\n"
// Restore all int arg registers
"lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
@@ -259,10 +259,10 @@ asm(
"ld 4, 16(5)\n" // stub's lr
"li 5, 1\n" // 1 == 64 bit
#ifdef __ELF__
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"nop\n"
#else
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
#endif
"mtctr 3\n"
// Restore all int arg registers
@@ -292,9 +292,10 @@ void PPC64CompilationCallback() {
#endif
extern "C" {
-static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
// Adjust the pointer to the address of the call instruction in the stub
// emitted by emitFunctionStub, rather than the instruction after it.
unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2f8243a597e6..46d4a08eb687 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -14,8 +14,8 @@
#ifndef POWERPC_JITINFO_H
#define POWERPC_JITINFO_H
-#include "llvm/Target/TargetJITInfo.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
class PPCTargetMachine;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 19ec993ba00f..9b0df3e86a75 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
+ case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+ break;
+ case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+ break;
+ case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+ break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 24caffa3f0f2..ee18eadf6e5f 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
@@ -71,11 +81,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR;
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
+ SpillsVRSAVE(false),
LRStoreRequired(false),
MinReservedArea(0),
TailCallSPDelta(0),
@@ -83,7 +99,8 @@ public:
VarArgsFrameIndex(0),
VarArgsStackOffset(0),
VarArgsNumGPR(0),
- VarArgsNumFPR(0) {}
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -105,9 +122,18 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
@@ -125,6 +151,9 @@ public:
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3f6..1d61a3a8eac2 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -15,63 +15,45 @@
#define DEBUG_TYPE "reginfo"
#include "PPCRegisterInfo.h"
#include "PPC.h"
+#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
- ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
}
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
-}
-
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -111,11 +93,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
- // For 32-bit SVR4, also initialize the frame index associated with
- // the CR spill slot.
- if (!Subtarget.isPPC64())
- CRSpillFrameIdx = 0;
-
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
@@ -128,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ if (Subtarget.isDarwin())
+ return CSR_NoRegs_Darwin_RegMask;
+ return CSR_NoRegs_RegMask;
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
- Reserved.set(PPC::R0);
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
Reserved.set(PPC::LR8);
@@ -144,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
- // Note that this is over conservative, as it also prevents allocation of R31
- // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
- Reserved.set(PPC::R31);
- Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- Reserved.set(PPC::X31);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -190,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -204,77 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-void PPCRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- I->getOpcode() == PPC::ADJCALLSTACKUP) {
- // Add (actually subtract) back the amount the callee popped on return.
- if (int CalleeAmt = I->getOperand(1).getImm()) {
- bool is64Bit = Subtarget.isPPC64();
- CalleeAmt *= -1;
- unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
- unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
-
- if (isInt<16>(CalleeAmt)) {
- BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addImm(CalleeAmt);
- } else {
- MachineBasicBlock::iterator MBBI = I;
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
- .addImm(CalleeAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
- .addImm(CalleeAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addReg(TmpReg);
- }
- }
- }
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -282,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
///
-void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the instruction.
MachineInstr &MI = *II;
// Get the instruction's basic block.
@@ -315,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -346,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -398,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
/// stw rA, FI ; Store rA to the frame.
///
void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -424,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
- if (SrcReg != PPC::CR0)
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
.addImm(0)
.addImm(31);
+ }
addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
- .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ .addReg(Reg, RegState::Kill),
FrameIndex);
// Discard the pseudo instruction.
@@ -441,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
}
void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -468,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
.addImm(31);
}
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(Reg);
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -489,18 +438,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we must
- // create exactly one stack slot and return its FrameIdx for all
- // nonvolatiles.
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64())
FrameIdx = 0;
- } else if (CRSpillFrameIdx) {
- FrameIdx = CRSpillFrameIdx;
- } else {
- MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
- FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
- CRSpillFrameIdx = FrameIdx;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
}
return true;
}
@@ -509,7 +454,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -523,20 +469,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- // Find out which operand is the frame index.
- unsigned FIOperandNo = 0;
- while (!MI.getOperand(FIOperandNo).isFI()) {
- ++FIOperandNo;
- assert(FIOperandNo != MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
// Take into account whether it's an add or mem instruction
- unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNo-1;
+ OffsetOperandNo = FIOperandNum-1;
// Get the frame index.
- int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Get the frame pointer save index. Users of this index are primarily
// DYNALLOC instructions.
@@ -548,25 +487,29 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special case for dynamic alloca.
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
- lowerDynamicAlloc(II, SPAdj, RS);
+ lowerDynamicAlloc(II);
return;
}
- // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
(is64Bit ? PPC::X31 : PPC::R31) :
(is64Bit ? PPC::X1 : PPC::R1),
false);
@@ -579,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case PPC::LWA:
case PPC::LD:
case PPC::STD:
- case PPC::STD_32:
isIXAddr = true;
break;
}
-
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -596,7 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
@@ -606,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -616,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
.addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
- .addReg(SReg, RegState::Kill)
+ .addReg(SRegHi, RegState::Kill)
.addImm(Offset);
// Convert into indexed form of the instruction:
@@ -637,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -647,7 +595,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index a8fd796d9e97..7e6683eeb2ef 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef POWERPC32_REGISTERINFO_H
#define POWERPC32_REGISTERINFO_H
+#include "llvm/ADT/DenseMap.h"
#include "PPC.h"
-#include <map>
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
@@ -27,10 +27,9 @@ class TargetInstrInfo;
class Type;
class PPCRegisterInfo : public PPCGenRegisterInfo {
- std::map<unsigned, unsigned> ImmToIdxMap;
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
- mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@@ -45,31 +44,38 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- void lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 5ca387629b6c..57a25f5143fa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -27,178 +27,72 @@ class PPCReg<string n> : Register<n> {
// GPR - One of the 32 32-bit general-purpose registers
class GPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// GP8 - One of the 32 64-bit general-purpose registers
class GP8<GPR SubReg, string n> : PPCReg<n> {
- field bits<5> Num = SubReg.Num;
+ let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
class SPR<bits<10> num, string n> : PPCReg<n> {
- field bits<10> Num = num;
+ let HWEncoding{9-0} = num;
}
// FPR - One of the 32 64-bit floating-point registers
class FPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// VR - One of the 32 128-bit vector registers
class VR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
- field bits<3> Num = num;
+ let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// CRBIT - One of the 32 1-bit condition register fields
class CRBIT<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
-
// General-purpose registers
-def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
-def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
-def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
-def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
-def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
-def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
-def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
-def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
-def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
-def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
-def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
-def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
-def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
-def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
-def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
-def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
-def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
-def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
-def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
-def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
-def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
-def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
-def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
-def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
-def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
-def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
-def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
-def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
-def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
-def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
-def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
-def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
// 64-bit General-purpose registers
-def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
-def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
-def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
-def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
-def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
-def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
-def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
-def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
-def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
-def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
-def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
-def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
-def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
-def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
-def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
-def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
-def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
-def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
-def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
-def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
-def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
-def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
-def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
-def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
-def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
-def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
-def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
-def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
-def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
-def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
-def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
-def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
// Floating-point registers
-def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
-def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
-def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
-def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
-def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
-def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
-def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
-def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
-def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
-def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
-def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
-def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
-def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
-def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
-def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
-def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
-def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
-def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
-def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
-def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
-def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
-def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
-def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
-def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
-def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
-def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
-def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
-def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
-def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
-def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
-def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
-def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
// Vector registers
-def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
-def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
-def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
-def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
-def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
-def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
-def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
-def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
-def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
-def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
-def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
-def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
-def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
-def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
-def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
-def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
-def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
-def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
-def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
-def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
-def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
-def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
-def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
-def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
-def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
-def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
-def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
-def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
-def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
-def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
-def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
-def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+foreach Index = 0-31 in {
+ def V#Index : VR<Index, "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
@@ -278,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, LR)>;
+ R31, R0, R1, FP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, LR8)>;
+ X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index ba63b5cd8faf..ae084aa0e8c1 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -749,3 +749,18 @@ def PPCA2Itineraries : ProcessorItineraries<
[15, 7],
[FPR_Bypass, FPR_Bypass]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+ let IssueWidth = 1; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 6;
+
+ let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 7c02ea099c14..c64998d52a0c 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -92,3 +92,18 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = G5Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 9c8cb92cc7ea..a8f2b3f47d1b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
-#include "PPCRegisterInfo.h"
#include "PPC.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -36,9 +36,20 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
+ , HasQPX(false)
, HasFSQRT(false)
+ , HasFRE(false)
+ , HasFRES(false)
+ , HasFRSQRTE(false)
+ , HasFRSQRTES(false)
+ , HasRecipPrec(false)
, HasSTFIWX(false)
+ , HasLFIWAX(false)
+ , HasFPRND(false)
+ , HasFPCVT(false)
, HasISEL(false)
+ , HasPOPCNTD(false)
+ , HasLDBRX(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
@@ -82,6 +93,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
// Set up darwin-specific properties.
if (isDarwin())
HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
}
/// SetJITMode - This is called to inform the subtarget info that we are
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b9e22f43c39e..65b4d211fc6a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,9 +14,9 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -43,7 +43,12 @@ namespace PPC {
DIR_A2,
DIR_E500mc,
DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
DIR_PWR6,
+ DIR_PWR6X,
DIR_PWR7,
DIR_64
};
@@ -70,9 +75,17 @@ protected:
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
+ bool HasQPX;
bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasLDBRX;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -148,10 +161,21 @@ public:
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -160,6 +184,8 @@ public:
bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGP - True if this is a BG/P platform.
bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3fc977ee2b41..fe851c1b6fb8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,13 @@
#include "PPCTargetMachine.h"
#include "PPC.h"
-#include "llvm/PassManager.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::
@@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
@@ -127,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
return false;
}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c168433a71b3..606ccb314126 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -15,14 +15,13 @@
#define PPC_TARGETMACHINE_H
#include "PPCFrameLowering.h"
-#include "PPCSubtarget.h"
-#include "PPCJITInfo.h"
-#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
#include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
namespace llvm {
@@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -66,17 +63,14 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2504ba70c25a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return PSK_FastHardware;
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 12;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes)
+ Cost *= (SrcBytes/Alignment);
+
+ return Cost;
+}
+
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index b6763aa73802..cc2ff966332e 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -1,7 +1,6 @@
//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
TODO:
-* gpr0 allocation
* lmw/stmw pass a la arm load store optimizer for prolog/epilog
===-------------------------------------------------------------------------===
@@ -204,12 +203,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
===-------------------------------------------------------------------------===
-Implement Newton-Rhapson method for improving estimate instructions to the
-correct accuracy, and implementing divide as multiply by reciprocal when it has
-more than one use. Itanium would want this too.
-
-===-------------------------------------------------------------------------===
-
Compile offsets from allocas:
int *%test() {
@@ -536,20 +529,6 @@ void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
===-------------------------------------------------------------------------===
-Complete the signed i32 to FP conversion code using 64-bit registers
-transformation, good for PI. See PPCISelLowering.cpp, this comment:
-
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-Also, if the registers are spilled to the stack, we have to ensure that all
-64-bits of them are save/restored, otherwise we will miscompile the code. It
-sounds like we need to get the 64-bit register classes going.
-
-===-------------------------------------------------------------------------===
-
%struct.B = type { i8, [3 x i8] }
define void @bar(%struct.B* %b) {
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5dc8568d83f2..fa44331b8af6 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
new file mode 100644
index 000000000000..0b01433cc926
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.h
@@ -0,0 +1,51 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+
+// SI Passes
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
+
+// Passes common to R600 and SI
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+namespace ShaderType {
+ enum Type {
+ PIXEL = 0,
+ VERTEX = 1,
+ GEOMETRY = 2,
+ COMPUTE = 3
+ };
+}
+
+#endif // AMDGPU_H
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
new file mode 100644
index 000000000000..1a26c77d6bb2
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.td
@@ -0,0 +1,41 @@
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+
+
+def AMDGPUInstrInfo : InstrInfo {
+ let guessInstructionProperties = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDGPUAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def AMDGPU : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyWriters = [AMDGPUAsmWriter];
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
new file mode 100644
index 000000000000..f6001445f4b3
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,145 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code. When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+
+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ return new AMDGPUAsmPrinter(tm, Streamer);
+}
+
+extern "C" void LLVMInitializeR600AsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+}
+
+/// We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+ }
+ SetupMachineFunction(MF);
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText("@" + MF.getName() + ":");
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ EmitProgramInfo(MF);
+ }
+ EmitFunctionBody();
+ return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+ unsigned MaxSGPR = 0;
+ unsigned MaxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDGPU::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ switch (reg) {
+ default: break;
+ case AMDGPU::EXEC:
+ case AMDGPU::M0:
+ continue;
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 8;
+ } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 16;
+ } else {
+ assert(!"Unknown register class");
+ }
+ hwReg = RI->getEncodingValue(reg) & 0xff;
+ maxUsed = hwReg + width - 1;
+ if (isSGPR) {
+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+ } else {
+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ MaxSGPR += 2;
+ }
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+ OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
new file mode 100644
index 000000000000..3812282b1798
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -0,0 +1,44 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_ASMPRINTER_H
+#define AMDGPU_ASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+
+public:
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU Assembly Printer";
+ }
+
+ /// \brief Emit register usage information so that the GPU driver
+ /// can correctly setup the GPU state.
+ void EmitProgramInfo(MachineFunction &MF);
+
+ /// Implemented in AMDGPUMCInstLower.cpp
+ virtual void EmitInstruction(const MachineInstr *MI);
+};
+
+} // End anonymous llvm
+
+#endif //AMDGPU_ASMPRINTER_H
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
new file mode 100644
index 000000000000..45ae37ef0c7f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -0,0 +1,42 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+ CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+ ]>>>,
+
+ CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+ >>>,
+
+ CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ ]>>>
+
+]>;
+
+def CC_AMDGPU : CallingConv<[
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
+]>;
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
new file mode 100644
index 000000000000..50297d1f60c8
--- /dev/null
+++ b/lib/Target/R600/AMDGPUConvertToISA.cpp
@@ -0,0 +1,62 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers AMDIL machine instructions to the appropriate
+/// hardware instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
new file mode 100644
index 000000000000..815d6f71c3be
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -0,0 +1,122 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+ // XXX: Hardcoding to 1 for now.
+ //
+ // I think the StackWidth should stored as metadata associated with the
+ // MachineFunction. This metadata can either be added by a frontend, or
+ // calculated by a R600 specific LLVM IR pass.
+ //
+ // The StackWidth determines how stack objects are laid out in memory.
+ // For a vector stack variable, like: int4 stack[2], the data will be stored
+ // in the following ways depending on the StackWidth.
+ //
+ // StackWidth = 1:
+ //
+ // T0.X = stack[0].x
+ // T1.X = stack[0].y
+ // T2.X = stack[0].z
+ // T3.X = stack[0].w
+ // T4.X = stack[1].x
+ // T5.X = stack[1].y
+ // T6.X = stack[1].z
+ // T7.X = stack[1].w
+ //
+ // StackWidth = 2:
+ //
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T1.X = stack[0].z
+ // T1.Y = stack[0].w
+ // T2.X = stack[1].x
+ // T2.Y = stack[1].y
+ // T3.X = stack[1].z
+ // T3.Y = stack[1].w
+ //
+ // StackWidth = 4:
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T0.Z = stack[0].z
+ // T0.W = stack[0].w
+ // T1.X = stack[1].x
+ // T1.Y = stack[1].y
+ // T1.Z = stack[1].z
+ // T1.W = stack[1].w
+ return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Offset = 0;
+ int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+ for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+ const AllocaInst *Alloca = MFI->getObjectAllocation(i);
+ unsigned ArrayElements;
+ const Type *AllocaType = Alloca->getAllocatedType();
+ const Type *ElementType;
+
+ if (AllocaType->isArrayTy()) {
+ ArrayElements = AllocaType->getArrayNumElements();
+ ElementType = AllocaType->getArrayElementType();
+ } else {
+ ArrayElements = 1;
+ ElementType = AllocaType;
+ }
+
+ unsigned VectorElements;
+ if (ElementType->isVectorTy()) {
+ VectorElements = ElementType->getVectorNumElements();
+ } else {
+ VectorElements = 1;
+ }
+
+ Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
+ }
+ return Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
new file mode 100644
index 000000000000..cf5742ee0952
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -0,0 +1,44 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILFRAME_LOWERING_H
+#define AMDILFRAME_LOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1);
+ virtual ~AMDGPUFrameLowering();
+
+ /// \returns The number of 32-bit sub-registers that are used when storing
+ /// values to the stack.
+ virtual unsigned getStackWidth(const MachineFunction &MF) const;
+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
new file mode 100644
index 000000000000..a266df535d56
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -0,0 +1,414 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This is the parent TargetLowering class for hardware code gen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+#include "AMDGPUGenCallingConv.inc"
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+ // Initialize target lowering borrowed from AMDIL
+ InitAMDILLowering();
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Library functions. These default to Expand, but we have instructions
+ // for them.
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
+ setOperationAction(ISD::FPOW, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ // Lower floating point store/load to integer store/load to reduce the number
+ // of patterns in tablegen.
+ setOperationAction(ISD::STORE, MVT::f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::LOAD, MVT::f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+ State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+ const {
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ // AMDIL DAG lowering
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ // AMDGPU DAG lowering
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ }
+ return Op;
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ switch (IntrinsicID) {
+ default: return Op;
+ case AMDGPUIntrinsic::AMDIL_abs:
+ return LowerIntrinsicIABS(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_exp:
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_lrp:
+ return LowerIntrinsicLRP(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_fraction:
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_max:
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_min:
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_round_nearest:
+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ Op.getOperand(1));
+
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ Op.getOperand(1));
+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+ Op.getOperand(3));
+ return DAG.getNode(ISD::FADD, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+ OneSubAC);
+}
+
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+
+ if (VT != MVT::f32 ||
+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ return SDValue();
+ }
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2:
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2:
+ case ISD::SETUO:
+ case ISD::SETO:
+ assert(0 && "Operation should already be optimised !");
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ }
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETUGT:
+ case ISD::SETOGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ }
+ case ISD::SETCC_INVALID:
+ assert(0 && "Invalid setcc condcode !");
+ }
+ return Op;
+}
+
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue Num = Op.getOperand(0);
+ SDValue Den = Op.getOperand(1);
+
+ SmallVector<SDValue, 8> Results;
+
+ // RCP = URECIP(Den) = 2^32 / Den + e
+ // e is rounding error.
+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+ // RCP_LO = umulo(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+ // RCP_HI = mulhu (RCP, Den) */
+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+ // NEG_RCP_LO = -RCP_LO
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ RCP_LO);
+
+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ NEG_RCP_LO, RCP_LO,
+ ISD::SETEQ);
+ // Calculate the rounding error from the URECIP instruction
+ // E = mulhu(ABS_RCP_LO, RCP)
+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+ // RCP_A_E = RCP + E
+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+ // RCP_S_E = RCP - E
+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ RCP_A_E, RCP_S_E,
+ ISD::SETEQ);
+ // Quotient = mulhu(Tmp0, Num)
+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+ // Num_S_Remainder = Quotient * Den
+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+ // Remainder = Num - Num_S_Remainder
+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+ DAG.getConstant(0, VT),
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+ Remainder_GE_Zero);
+
+ // Calculate Division result:
+
+ // Quotient_A_One = Quotient + 1
+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Quotient_S_One = Quotient - 1
+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Quotient, Quotient_A_One, ISD::SETEQ);
+
+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Quotient_S_One, Div, ISD::SETEQ);
+
+ // Calculate Rem result:
+
+ // Remainder_S_Den = Remainder - Den
+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+ // Remainder_A_Den = Remainder + Den
+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Remainder, Remainder_S_Den, ISD::SETEQ);
+
+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Remainder_A_Den, Rem, ISD::SETEQ);
+ SDValue Ops[2];
+ Ops[0] = Div;
+ Ops[1] = Rem;
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->isExactlyValue(1.0);
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isAllOnesValue();
+ }
+ return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->getValueAPF().isZero();
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isNullValue();
+ }
+ return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned VirtualRegister;
+ if (!MRI.isLiveIn(Reg)) {
+ VirtualRegister = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(Reg, VirtualRegister);
+ } else {
+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
+ }
+ return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ // AMDIL DAG nodes
+ NODE_NAME_CASE(CALL);
+ NODE_NAME_CASE(UMUL);
+ NODE_NAME_CASE(DIV_INF);
+ NODE_NAME_CASE(RET_FLAG);
+ NODE_NAME_CASE(BRANCH_COND);
+
+ // AMDGPU DAG nodes
+ NODE_NAME_CASE(DWORDADDR)
+ NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(SMAX)
+ NODE_NAME_CASE(UMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(SMIN)
+ NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ NODE_NAME_CASE(EXPORT)
+ NODE_NAME_CASE(CONST_ADDRESS)
+ NODE_NAME_CASE(REGISTER_LOAD)
+ NODE_NAME_CASE(REGISTER_STORE)
+ }
+}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
new file mode 100644
index 000000000000..f31b6466bd46
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -0,0 +1,140 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition of the TargetLowering class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering {
+private:
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+ /// MachineFunction.
+ ///
+ /// \returns a RegisterSDNode representing Reg.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
+
+ bool isHWTrueValue(SDValue Op) const;
+ bool isHWFalseValue(SDValue Op) const;
+
+ void AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
+ virtual SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ CLI.Callee.dump();
+ llvm_unreachable("Undefined function");
+ }
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const {
+ return N;
+ }
+
+// Functions defined in AMDILISelLowering.cpp
+public:
+
+ /// \brief Determine which of the bits specified in \p Mask are known to be
+ /// either zero or one and return them in the \p KnownZero and \p KnownOne
+ /// bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+
+ /// We want to mark f32/f64 floating point values as legal.
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// We don't want to shrink f64/f32 constants.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+private:
+ void InitAMDILLowering();
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+};
+
+namespace AMDGPUISD {
+
+enum {
+ // AMDIL ISD Opcodes
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CALL, // Function call based on a single integer
+ UMUL, // 32bit unsigned multiplication
+ DIV_INF, // Divide with infinity returned on zero divisor
+ RET_FLAG,
+ BRANCH_COND,
+ // End AMDIL ISD Opcodes
+ BITALIGN,
+ DWORDADDR,
+ FRACT,
+ FMAX,
+ SMAX,
+ UMAX,
+ FMIN,
+ SMIN,
+ UMIN,
+ URECIP,
+ EXPORT,
+ CONST_ADDRESS,
+ REGISTER_LOAD,
+ REGISTER_STORE,
+ LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
new file mode 100644
index 000000000000..ed6c8ec55dd2
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -0,0 +1,343 @@
+//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Instructions can use indirect addressing to index the register file as if it
+/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
+/// to either a COPY or a MOV that uses indirect addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const AMDGPUInstrInfo *TII;
+
+ bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
+
+public:
+ AMDGPUIndirectAddressingPass(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Handle indirect addressing"; }
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUIndirectAddressingPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
+ return new AMDGPUIndirectAddressingPass(tm);
+}
+
+bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ int IndirectBegin = TII->getIndirectIndexBegin(MF);
+ int IndirectEnd = TII->getIndirectIndexEnd(MF);
+
+ if (IndirectBegin == -1) {
+ // No indirect addressing, we can skip this pass
+ assert(IndirectEnd == -1);
+ return false;
+ }
+
+ // The map keeps track of the indirect address that is represented by
+ // each virtual register. The key is the register and the value is the
+ // indirect address it uses.
+ std::map<unsigned, unsigned> RegisterAddressMap;
+
+ // First pass - Lower all of the RegisterStore instructions and track which
+ // registers are live.
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // This map keeps track of the current live indirect registers.
+ // The key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterStore(MI)) {
+ continue;
+ }
+
+ // Lower RegisterStore
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+ const TargetRegisterClass *IndirectStoreRegClass =
+ TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access.
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
+ .addOperand(MI.getOperand(0));
+
+ RegisterAddressMap[DstReg] = Address;
+ LiveAddressRegisterMap[Address] = DstReg;
+ } else {
+ // Indirect register access.
+ MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+ MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
+ RegisterAddressMap[DstReg] = Addr;
+ LiveAddressRegisterMap[Addr] = DstReg;
+ }
+ }
+ MI.eraseFromParent();
+ }
+
+ // Update the live-ins of the succesor blocks
+ for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
+ SuccEnd = MBB.succ_end();
+ SuccEnd != Succ; ++Succ) {
+ std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
+ for (Key = LiveAddressRegisterMap.begin(),
+ KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
+ (*Succ)->addLiveIn(Key->second);
+ }
+ }
+ }
+
+ // Second pass - Lower the RegisterLoad instructions
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // Key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
+ while (LI != MBB.livein_end()) {
+ std::vector<unsigned> PhiRegisters;
+
+ // Make sure this live in is used for indirect addressing
+ if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
+ ++LI;
+ continue;
+ }
+
+ unsigned Address = RegisterAddressMap[*LI];
+ LiveAddressRegisterMap[Address] = *LI;
+ PhiRegisters.push_back(*LI);
+
+ // Check if there are other live in registers which map to the same
+ // indirect address.
+ for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
+ LE = MBB.livein_end();
+ LJ != LE; ++LJ) {
+ unsigned Reg = *LJ;
+ if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
+ continue;
+ }
+
+ if (RegisterAddressMap[Reg] == Address) {
+ PhiRegisters.push_back(Reg);
+ }
+ }
+
+ if (PhiRegisters.size() == 1) {
+ // We don't need to insert a Phi instruction, so we can just add the
+ // registers to the live list for the block.
+ LiveAddressRegisterMap[Address] = *LI;
+ MBB.removeLiveIn(*LI);
+ } else {
+ // We need to insert a PHI, because we have the same address being
+ // written in multiple predecessor blocks.
+ const TargetRegisterClass *PhiDstClass =
+ TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
+ unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
+ MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
+ MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::PHI), PhiDstReg);
+
+ for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
+ RE = PhiRegisters.end();
+ RI != RE; ++RI) {
+ unsigned Reg = *RI;
+ MachineInstr *DefInst = MRI.getVRegDef(Reg);
+ assert(DefInst);
+ MachineBasicBlock *RegBlock = DefInst->getParent();
+ Phi.addReg(Reg);
+ Phi.addMBB(RegBlock);
+ MBB.removeLiveIn(Reg);
+ }
+ RegisterAddressMap[PhiDstReg] = Address;
+ LiveAddressRegisterMap[Address] = PhiDstReg;
+ }
+ LI = MBB.livein_begin();
+ }
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterLoad(MI)) {
+ if (MI.getOpcode() == AMDGPU::PHI) {
+ continue;
+ }
+ // Check for indirect register defs
+ for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
+ OpIdx < NumOperands; ++OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isReg() && MO.isDef() &&
+ RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
+ unsigned Reg = MO.getReg();
+ unsigned LiveAddress = RegisterAddressMap[Reg];
+ // Chain the live-ins
+ if (LiveAddressRegisterMap.find(LiveAddress) !=
+ RegisterAddressMap.end()) {
+ MI.addOperand(MachineOperand::CreateReg(
+ LiveAddressRegisterMap[LiveAddress],
+ false, // isDef
+ true, // isImp
+ true)); // isKill
+ }
+ LiveAddressRegisterMap[LiveAddress] = Reg;
+ }
+ }
+ continue;
+ }
+
+ const TargetRegisterClass *SuperIndirectRegClass =
+ TII->getSuperIndirectRegClass();
+ const TargetRegisterClass *IndirectLoadRegClass =
+ TII->getIndirectAddrLoadRegClass();
+ unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access
+ unsigned Reg = LiveAddressRegisterMap[Address];
+ unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
+
+ if (regHasExplicitDef(MRI, Reg)) {
+ // If the register we are reading from has an explicit def, then that
+ // means it was written via a direct register access (i.e. COPY
+ // or other instruction that doesn't use indirect addressing). In
+ // this case we know where the value has been stored, so we can just
+ // issue a copy.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(Reg);
+ } else {
+ // If the register we are reading has an implicit def, then that
+ // means it was written by an indirect register access (i.e. An
+ // instruction that uses indirect addressing.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(AddrReg)
+ .addReg(Reg, RegState::Implicit);
+ }
+ } else {
+ // Indirect register access
+
+ // Note on REQ_SEQUENCE instructons: You can't actually use the register
+ // it defines unless you have an instruction that takes the defined
+ // register class as an operand.
+
+ MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::REG_SEQUENCE),
+ IndirectReg);
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
+ continue;
+ }
+ unsigned Reg = LiveAddressRegisterMap[Addr];
+
+ // We only need to use REG_SEQUENCE for explicit defs, since the
+ // register coalescer won't do anything with the implicit defs.
+ if (!regHasExplicitDef(MRI, Reg)) {
+ continue;
+ }
+
+ // Insert a REQ_SEQUENCE instruction to force the register allocator
+ // to allocate the virtual register to the correct physical register.
+ Sequence.addReg(LiveAddressRegisterMap[Addr]);
+ Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
+ }
+ MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+
+
+
+ Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
+ Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
+
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+
+ if (!DefInstr) {
+ return false;
+ }
+
+ if (DefInstr->getOpcode() == AMDGPU::PHI) {
+ bool Explicit = false;
+ for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
+ E = DefInstr->operands_end();
+ I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef()) {
+ continue;
+ }
+
+ Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
+ }
+ return Explicit;
+ }
+
+ return DefInstr->getOperand(0).isReg() &&
+ DefInstr->getOperand(0).getReg() == Reg;
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
new file mode 100644
index 000000000000..30f736c84c25
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -0,0 +1,267 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implementation of the TargetInstrInfo class that is common to all
+/// AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "AMDGPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ case AMDGPU::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
+ || tmp->getOpcode() == AMDGPU::ENDIF
+ || tmp->getOpcode() == AMDGPU::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Convert dst regclass to one that is supported by the ISA
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ }
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
new file mode 100644
index 000000000000..3909e4e105ee
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -0,0 +1,206 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Contains the definition of a TargetInstrInfo class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H
+#define AMDGPUINSTRUCTIONINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+ const AMDGPURegisterInfo RI;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+protected:
+ TargetMachine &TM;
+public:
+ explicit AMDGPUInstrInfo(TargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const = 0;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // Helper functions that check the opcode for status information
+ bool isLoadInst(llvm::MachineInstr *MI) const;
+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isStoreInst(llvm::MachineInstr *MI) const;
+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+ bool isRegisterStore(const MachineInstr &MI) const;
+ bool isRegisterLoad(const MachineInstr &MI) const;
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
+
+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const = 0;
+ virtual unsigned getIEQOpcode() const = 0;
+ virtual bool isMov(unsigned opcode) const = 0;
+
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
+
+ /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+ /// \p Channel
+ ///
+ /// We model indirect addressing using a virtual address space that can be
+ /// accesed with loads and stores. The "Indirect Address" is the memory
+ /// address in this virtual address space that maps to the given \p RegIndex
+ /// and \p Channel.
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const = 0;
+
+ /// \returns The register class to be used for storing values to an
+ /// "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const = 0;
+
+ /// \returns The register class to be used for loading values from
+ /// an "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+
+ /// \brief Build instruction(s) for an indirect register write.
+ ///
+ /// \returns The instruction that performs the indirect register write
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \brief Build instruction(s) for an indirect register read.
+ ///
+ /// \returns The instruction that performs the indirect register read
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \returns the register class whose sub registers are the set of all
+ /// possible registers that can be used for indirect addressing.
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
+
+
+ /// \brief Convert the AMDIL MachineInstr to a supported ISA
+ /// MachineInstr
+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
+#endif // AMDGPUINSTRINFO_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
new file mode 100644
index 000000000000..b66ae879dc20
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -0,0 +1,82 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+
+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayStore]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
new file mode 100644
index 000000000000..e740348717c7
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -0,0 +1,266 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bit isRegisterLoad = 0;
+ field bit isRegisterStore = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = NullALU;
+
+ let TSFlags{63} = isRegisterLoad;
+ let TSFlags{62} = isRegisterStore;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+
+def COND_EQ : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOEQ: case ISD::SETUEQ:
+ case ISD::SETEQ: return true;}}}]
+>;
+
+def COND_NE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETONE: case ISD::SETUNE:
+ case ISD::SETNE: return true;}}}]
+>;
+def COND_GT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGT: case ISD::SETUGT:
+ case ISD::SETGT: return true;}}}]
+>;
+
+def COND_GE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGE: case ISD::SETUGE:
+ case ISD::SETGE: return true;}}}]
+>;
+
+def COND_LT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLT: case ISD::SETULT:
+ case ISD::SETLT: return true;}}}]
+>;
+
+def COND_LE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLE: case ISD::SETULE:
+ case ISD::SETLE: return true;}}}]
+>;
+
+def COND_NULL : PatLeaf <
+ (cond),
+ [{return false;}]
+>;
+
+//===----------------------------------------------------------------------===//
+// Load/Store Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1 in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FNEG $dst, $src0",
+ [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+ ComplexPattern addrPat> {
+ def RegisterLoad : AMDGPUShaderInst <
+ (outs dstClass:$dst),
+ (ins addrClass:$addr, i32imm:$chan),
+ "RegisterLoad $dst, $addr",
+ [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
+ (i32 timm:$chan)))]
+ > {
+ let isRegisterLoad = 1;
+ }
+
+ def RegisterStore : AMDGPUShaderInst <
+ (outs),
+ (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+ "RegisterStore $val, $addr",
+ [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+ > {
+ let isRegisterStore = 1;
+ }
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (fpow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+ RegisterClass vec_class, int sub_idx,
+ SubRegIndex sub_reg>: Pat<
+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+ RegisterClass elem_class, RegisterClass vec_class,
+ int sub_idx, SubRegIndex sub_reg> : Pat <
+
+ (vec_type (vector_insert (vec_type vec_class:$vec),
+ (elem_type elem_class:$elem), sub_idx)),
+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$src))),
+ (vecType elemClass:$src)
+>;
+
+class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+>;
+
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
+ (elemType elemClass:$z), (elemType elemClass:$w))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
+ elemClass:$z, sub2), elemClass:$w, sub3)
+>;
+
+class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+>;
+
+class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7),
+ (elemType elemClass:$sub8), (elemType elemClass:$sub9),
+ (elemType elemClass:$sub10), (elemType elemClass:$sub11),
+ (elemType elemClass:$sub12), (elemType elemClass:$sub13),
+ (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7),
+ elemClass:$sub8, sub8), elemClass:$sub9, sub9),
+ elemClass:$sub10, sub10), elemClass:$sub11, sub11),
+ elemClass:$sub12, sub12), elemClass:$sub13, sub13),
+ elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+ (dt (bitconvert (st rc:$src0))),
+ (dt rc:$src0)
+>;
+
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
+ (vt rc:$addr)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
new file mode 100644
index 000000000000..eecb25b04f79
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -0,0 +1,60 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
new file mode 100644
index 000000000000..1dc1c657dfe5
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -0,0 +1,83 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
+ Ctx(ctx)
+{ }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_FPImmediate: {
+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
+ "Only floating point immediates are supported at the moment.");
+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ }
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ AMDGPUMCInstLower MCInstLowering(OutContext);
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = MI;
+ ++I;
+ while (I != MBB->end() && I->isInsideBundle()) {
+ MCInst MCBundleInst;
+ const MachineInstr *BundledInst = I;
+ MCInstLowering.lower(BundledInst, MCBundleInst);
+ OutStreamer.EmitInstruction(MCBundleInst);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+}
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
new file mode 100644
index 000000000000..d7d538e92599
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -0,0 +1,34 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_MCINSTLOWER_H
+#define AMDGPU_MCINSTLOWER_H
+
+namespace llvm {
+
+class MCInst;
+class MCContext;
+class MachineInstr;
+
+class AMDGPUMCInstLower {
+
+ MCContext &Ctx;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx);
+
+ /// \brief Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif //AMDGPU_MCINSTLOWER_H
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 000000000000..0223ec8e4f3f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+ MachineFunctionInfo() {
+ AttributeSet Set = MF.getFunction()->getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+ ShaderTypeAttribute);
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType))
+ llvm_unreachable("Can't parse shader type!");
+ }
+}
+
+}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 000000000000..21c8c51dae45
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+ static const char *ShaderTypeAttribute;
+public:
+ AMDGPUMachineFunction(const MachineFunction &MF);
+ unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
new file mode 100644
index 000000000000..fe994d2d05a1
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -0,0 +1,75 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPUGenRegisterInfo(0),
+ TM(tm),
+ TII(tii)
+ { }
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(!"Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(!"Subroutines not supported yet");
+ return 0;
+}
+
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+ switch(IndirectIndex) {
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ case 4: return AMDGPU::sub4;
+ case 5: return AMDGPU::sub5;
+ case 6: return AMDGPU::sub6;
+ case 7: return AMDGPU::sub7;
+ case 8: return AMDGPU::sub8;
+ case 9: return AMDGPU::sub9;
+ case 10: return AMDGPU::sub10;
+ case 11: return AMDGPU::sub11;
+ case 12: return AMDGPU::sub12;
+ case 13: return AMDGPU::sub13;
+ case 14: return AMDGPU::sub14;
+ case 15: return AMDGPU::sub15;
+ default: llvm_unreachable("indirect index out of range");
+ }
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
new file mode 100644
index 000000000000..1fc88e7455b9
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -0,0 +1,66 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H
+#define AMDGPUREGISTERINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
+ TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ static const uint16_t CalleeSavedReg;
+
+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ assert(!"Unimplemented"); return BitVector();
+ }
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns The ISA reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass * getISARegClass(
+ const TargetRegisterClass * RC) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td
new file mode 100644
index 000000000000..b5aca0347fb0
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -0,0 +1,25 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+
+foreach Index = 0-15 in {
+ def sub#Index : SubRegIndex;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
new file mode 100644
index 000000000000..dea43b874c6f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -0,0 +1,896 @@
+//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The pass implemented in this file transforms the programs control flow
+/// graph into a form that's suitable for code generation on hardware that
+/// implements control flow by execution masking. This currently includes all
+/// AMD GPUs but may as well be useful for other types of hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BranchInst*, 8> BranchVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef SmallPtrSet<BasicBlock *, 8> BBSet;
+
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
+typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
+
+// The name for newly created blocks.
+
+static const char *FlowBlockName = "Flow";
+
+/// @brief Find the nearest common dominator for multiple BasicBlocks
+///
+/// Helper class for AMDGPUStructurizeCFG
+/// TODO: Maybe move into common code
+class NearestCommonDominator {
+
+ DominatorTree *DT;
+
+ DTN2UnsignedMap IndexMap;
+
+ BasicBlock *Result;
+ unsigned ResultIndex;
+ bool ExplicitMentioned;
+
+public:
+ /// \brief Start a new query
+ NearestCommonDominator(DominatorTree *DomTree) {
+ DT = DomTree;
+ Result = 0;
+ }
+
+ /// \brief Add BB to the resulting dominator
+ void addBlock(BasicBlock *BB, bool Remember = true) {
+
+ DomTreeNode *Node = DT->getNode(BB);
+
+ if (Result == 0) {
+ unsigned Numbering = 0;
+ for (;Node;Node = Node->getIDom())
+ IndexMap[Node] = ++Numbering;
+ Result = BB;
+ ResultIndex = 1;
+ ExplicitMentioned = Remember;
+ return;
+ }
+
+ for (;Node;Node = Node->getIDom())
+ if (IndexMap.count(Node))
+ break;
+ else
+ IndexMap[Node] = 0;
+
+ assert(Node && "Dominator tree invalid!");
+
+ unsigned Numbering = IndexMap[Node];
+ if (Numbering > ResultIndex) {
+ Result = Node->getBlock();
+ ResultIndex = Numbering;
+ ExplicitMentioned = Remember && (Result == BB);
+ } else if (Numbering == ResultIndex) {
+ ExplicitMentioned |= Remember;
+ }
+ }
+
+ /// \brief Is "Result" one of the BBs added with "Remember" = True?
+ bool wasResultExplicitMentioned() {
+ return ExplicitMentioned;
+ }
+
+ /// \brief Get the query result
+ BasicBlock *getResult() {
+ return Result;
+ }
+};
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/
+/// 3
+/// || Where:
+/// | | 1 = "If" block, calculates the condition
+/// 4 | 2 = "Then" subregion, runs if the condition is true
+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/ 4 = "Else" optional subregion, runs if the condition is false
+/// 5 5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^ Where:
+/// | / 1 = "Entry" block
+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3 3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class AMDGPUStructurizeCFG : public RegionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+
+ Function *Func;
+ Region *ParentRegion;
+
+ DominatorTree *DT;
+
+ RNVector Order;
+ BBSet Visited;
+
+ BBPhiMap DeletedPhis;
+ BB2BBVecMap AddedPhis;
+
+ PredMap Predicates;
+ BranchVector Conditions;
+
+ BB2BBMap Loops;
+ PredMap LoopPreds;
+ BranchVector LoopConds;
+
+ RegionNode *PrevNode;
+
+ void orderNodes();
+
+ void analyzeLoops(RegionNode *N);
+
+ Value *invert(Value *Condition);
+
+ Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+
+ void gatherPredicates(RegionNode *N);
+
+ void collectInfos();
+
+ void insertConditions(bool Loops);
+
+ void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void setPhiValues();
+
+ void killTerminator(BasicBlock *BB);
+
+ void changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator);
+
+ BasicBlock *getNextFlow(BasicBlock *Dominator);
+
+ BasicBlock *needPrefix(bool NeedEmpty);
+
+ BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
+
+ void setPrevNode(BasicBlock *BB);
+
+ bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
+
+ bool isPredictableTrue(RegionNode *Node);
+
+ void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void createFlow();
+
+ void rebuildSSA();
+
+public:
+ AMDGPUStructurizeCFG():
+ RegionPass(ID) {
+
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ using Pass::doInitialization;
+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU simplify control flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ RegionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char AMDGPUStructurizeCFG::ID = 0;
+
+/// \brief Initialize the types and constants used in the pass
+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ return false;
+}
+
+/// \brief Build up the general order of nodes
+void AMDGPUStructurizeCFG::orderNodes() {
+ scc_iterator<Region *> I = scc_begin(ParentRegion),
+ E = scc_end(ParentRegion);
+ for (Order.clear(); I != E; ++I) {
+ std::vector<RegionNode *> &Nodes = *I;
+ Order.append(Nodes.begin(), Nodes.end());
+ }
+}
+
+/// \brief Determine the end of the loops
+void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) {
+
+ if (N->isSubRegion()) {
+ // Test for exit as back edge
+ BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
+ if (Visited.count(Exit))
+ Loops[Exit] = N->getEntry();
+
+ } else {
+ // Test for sucessors as back edge
+ BasicBlock *BB = N->getNodeAs<BasicBlock>();
+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
+ }
+ }
+}
+
+/// \brief Invert the given condition
+Value *AMDGPUStructurizeCFG::invert(Value *Condition) {
+
+ // First: Check if it's a constant
+ if (Condition == BoolTrue)
+ return BoolFalse;
+
+ if (Condition == BoolFalse)
+ return BoolTrue;
+
+ if (Condition == BoolUndef)
+ return BoolUndef;
+
+ // Second: If the condition is already inverted, return the original value
+ if (match(Condition, m_Not(m_Value(Condition))))
+ return Condition;
+
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
+
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
+
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
+ }
+
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+}
+
+/// \brief Build the condition for one edge
+Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+ bool Invert) {
+ Value *Cond = Invert ? BoolFalse : BoolTrue;
+ if (Term->isConditional()) {
+ Cond = Term->getCondition();
+
+ if (Idx != Invert)
+ Cond = invert(Cond);
+ }
+ return Cond;
+}
+
+/// \brief Analyze the predecessors of each block and build up predicates
+void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) {
+
+ RegionInfo *RI = ParentRegion->getRegionInfo();
+ BasicBlock *BB = N->getEntry();
+ BBPredicates &Pred = Predicates[BB];
+ BBPredicates &LPred = LoopPreds[BB];
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+
+ // Ignore it if it's a branch from outside into our region entry
+ if (!ParentRegion->contains(*PI))
+ continue;
+
+ Region *R = RI->getRegionFor(*PI);
+ if (R == ParentRegion) {
+
+ // It's a top level block in our region
+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (Succ != BB)
+ continue;
+
+ if (Visited.count(*PI)) {
+ // Normal forward edge
+ if (Term->isConditional()) {
+ // Try to treat it like an ELSE block
+ BasicBlock *Other = Term->getSuccessor(!i);
+ if (Visited.count(Other) && !Loops.count(Other) &&
+ !Pred.count(Other) && !Pred.count(*PI)) {
+
+ Pred[Other] = BoolFalse;
+ Pred[*PI] = BoolTrue;
+ continue;
+ }
+ }
+ Pred[*PI] = buildCondition(Term, i, false);
+
+ } else {
+ // Back edge
+ LPred[*PI] = buildCondition(Term, i, true);
+ }
+ }
+
+ } else {
+
+ // It's an exit from a sub region
+ while(R->getParent() != ParentRegion)
+ R = R->getParent();
+
+ // Edge from inside a subregion to its entry, ignore it
+ if (R == N)
+ continue;
+
+ BasicBlock *Entry = R->getEntry();
+ if (Visited.count(Entry))
+ Pred[Entry] = BoolTrue;
+ else
+ LPred[Entry] = BoolFalse;
+ }
+ }
+}
+
+/// \brief Collect various loop and predicate infos
+void AMDGPUStructurizeCFG::collectInfos() {
+
+ // Reset predicate
+ Predicates.clear();
+
+ // and loop infos
+ Loops.clear();
+ LoopPreds.clear();
+
+ // Reset the visited nodes
+ Visited.clear();
+
+ for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+ OI != OE; ++OI) {
+
+ // Analyze all the conditions leading to a node
+ gatherPredicates(*OI);
+
+ // Remember that we've seen this node
+ Visited.insert((*OI)->getEntry());
+
+ // Find the last back edges
+ analyzeLoops(*OI);
+ }
+}
+
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions(bool Loops) {
+ BranchVector &Conds = Loops ? LoopConds : Conditions;
+ Value *Default = Loops ? BoolTrue : BoolFalse;
+ SSAUpdater PhiInserter;
+
+ for (BranchVector::iterator I = Conds.begin(),
+ E = Conds.end(); I != E; ++I) {
+
+ BranchInst *Term = *I;
+ assert(Term->isConditional());
+
+ BasicBlock *Parent = Term->getParent();
+ BasicBlock *SuccTrue = Term->getSuccessor(0);
+ BasicBlock *SuccFalse = Term->getSuccessor(1);
+
+ PhiInserter.Initialize(Boolean, "");
+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
+ PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+
+ BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(Parent, false);
+
+ Value *ParentValue = 0;
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (PI->first == Parent) {
+ ParentValue = PI->second;
+ break;
+ }
+ PhiInserter.AddAvailableValue(PI->first, PI->second);
+ Dominator.addBlock(PI->first);
+ }
+
+ if (ParentValue) {
+ Term->setCondition(ParentValue);
+ } else {
+ if (!Dominator.wasResultExplicitMentioned())
+ PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
+
+ Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ }
+ }
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+ PhiMap &Map = DeletedPhis[To];
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ while (Phi.getBasicBlockIndex(From) != -1) {
+ Value *Deleted = Phi.removeIncomingValue(From, false);
+ Map[&Phi].push_back(std::make_pair(From, Deleted));
+ }
+ }
+}
+
+/// \brief Add a dummy PHI value as soon as we knew the new predecessor
+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ Value *Undef = UndefValue::get(Phi.getType());
+ Phi.addIncoming(Undef, From);
+ }
+ AddedPhis[To].push_back(From);
+}
+
+/// \brief Add the real PHI value as soon as everything is set up
+void AMDGPUStructurizeCFG::setPhiValues() {
+
+ SSAUpdater Updater;
+ for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
+ AI != AE; ++AI) {
+
+ BasicBlock *To = AI->first;
+ BBVector &From = AI->second;
+
+ if (!DeletedPhis.count(To))
+ continue;
+
+ PhiMap &Map = DeletedPhis[To];
+ for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
+ PI != PE; ++PI) {
+
+ PHINode *Phi = PI->first;
+ Value *Undef = UndefValue::get(Phi->getType());
+ Updater.Initialize(Phi->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(To, Undef);
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(To, false);
+ for (BBValueVector::iterator VI = PI->second.begin(),
+ VE = PI->second.end(); VI != VE; ++VI) {
+
+ Updater.AddAvailableValue(VI->first, VI->second);
+ Dominator.addBlock(VI->first);
+ }
+
+ if (!Dominator.wasResultExplicitMentioned())
+ Updater.AddAvailableValue(Dominator.getResult(), Undef);
+
+ for (BBVector::iterator FI = From.begin(), FE = From.end();
+ FI != FE; ++FI) {
+
+ int Idx = Phi->getBasicBlockIndex(*FI);
+ assert(Idx != -1);
+ Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+ }
+ }
+
+ DeletedPhis.erase(To);
+ }
+ assert(DeletedPhis.empty());
+}
+
+/// \brief Remove phi values from all successors and then remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (!Term)
+ return;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+
+ delPhiValues(BB, *SI);
+ }
+
+ Term->eraseFromParent();
+}
+
+/// \brief Let node exit(s) point to NewExit
+void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator) {
+
+ if (Node->isSubRegion()) {
+ Region *SubRegion = Node->getNodeAs<Region>();
+ BasicBlock *OldExit = SubRegion->getExit();
+ BasicBlock *Dominator = 0;
+
+ // Find all the edges from the sub region to the exit
+ for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
+ I != E;) {
+
+ BasicBlock *BB = *I++;
+ if (!SubRegion->contains(BB))
+ continue;
+
+ // Modify the edges to point to the new exit
+ delPhiValues(BB, OldExit);
+ BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
+ addPhiValues(BB, NewExit);
+
+ // Find the new dominator (if requested)
+ if (IncludeDominator) {
+ if (!Dominator)
+ Dominator = BB;
+ else
+ Dominator = DT->findNearestCommonDominator(Dominator, BB);
+ }
+ }
+
+ // Change the dominator (if requested)
+ if (Dominator)
+ DT->changeImmediateDominator(NewExit, Dominator);
+
+ // Update the region info
+ SubRegion->replaceExit(NewExit);
+
+ } else {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+ killTerminator(BB);
+ BranchInst::Create(NewExit, BB);
+ addPhiValues(BB, NewExit);
+ if (IncludeDominator)
+ DT->changeImmediateDominator(NewExit, BB);
+ }
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) {
+ LLVMContext &Context = Func->getContext();
+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+ Order.back()->getEntry();
+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+ Func, Insert);
+ DT->addNewBlock(Flow, Dominator);
+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+ return Flow;
+}
+
+/// \brief Create a new or reuse the previous node as flow node
+BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) {
+
+ BasicBlock *Entry = PrevNode->getEntry();
+
+ if (!PrevNode->isSubRegion()) {
+ killTerminator(Entry);
+ if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
+ return Entry;
+
+ }
+
+ // create a new flow node
+ BasicBlock *Flow = getNextFlow(Entry);
+
+ // and wire it up
+ changeExit(PrevNode, Flow, true);
+ PrevNode = ParentRegion->getBBNode(Flow);
+ return Flow;
+}
+
+/// \brief Returns the region exit if possible, otherwise just a new flow node
+BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow,
+ bool ExitUseAllowed) {
+
+ if (Order.empty() && ExitUseAllowed) {
+ BasicBlock *Exit = ParentRegion->getExit();
+ DT->changeImmediateDominator(Exit, Flow);
+ addPhiValues(Flow, Exit);
+ return Exit;
+ }
+ return getNextFlow(Flow);
+}
+
+/// \brief Set the previous node
+void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) {
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+}
+
+/// \brief Does BB dominate all the predicates of Node ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (!DT->dominates(BB, PI->first))
+ return false;
+ }
+ return true;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) {
+
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ bool Dominated = false;
+
+ // Regionentry is always true
+ if (PrevNode == 0)
+ return true;
+
+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+
+ if (I->second != BoolTrue)
+ return false;
+
+ if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
+ Dominated = true;
+ }
+
+ // TODO: The dominator check is too strict
+ return Dominated;
+}
+
+/// Take one node from the order vector and wire it up
+void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+
+ RegionNode *Node = Order.pop_back_val();
+ Visited.insert(Node->getEntry());
+
+ if (isPredictableTrue(Node)) {
+ // Just a linear flow
+ if (PrevNode) {
+ changeExit(PrevNode, Node->getEntry(), true);
+ }
+ PrevNode = Node;
+
+ } else {
+ // Insert extra prefix node (or reuse last one)
+ BasicBlock *Flow = needPrefix(false);
+
+ // Insert extra postfix node (or use exit instead)
+ BasicBlock *Entry = Node->getEntry();
+ BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
+
+ // let it point to entry and next block
+ Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+ addPhiValues(Flow, Entry);
+ DT->changeImmediateDominator(Entry, Flow);
+
+ PrevNode = Node;
+ while (!Order.empty() && !Visited.count(LoopEnd) &&
+ dominatesPredicates(Entry, Order.back())) {
+ handleLoops(false, LoopEnd);
+ }
+
+ changeExit(PrevNode, Next, false);
+ setPrevNode(Next);
+ }
+}
+
+void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+ RegionNode *Node = Order.back();
+ BasicBlock *LoopStart = Node->getEntry();
+
+ if (!Loops.count(LoopStart)) {
+ wireFlow(ExitUseAllowed, LoopEnd);
+ return;
+ }
+
+ if (!isPredictableTrue(Node))
+ LoopStart = needPrefix(true);
+
+ LoopEnd = Loops[Node->getEntry()];
+ wireFlow(false, LoopEnd);
+ while (!Visited.count(LoopEnd)) {
+ handleLoops(false, LoopEnd);
+ }
+
+ // Create an extra loop end node
+ LoopEnd = needPrefix(false);
+ BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
+ LoopConds.push_back(BranchInst::Create(Next, LoopStart,
+ BoolUndef, LoopEnd));
+ addPhiValues(LoopEnd, LoopStart);
+ setPrevNode(Next);
+}
+
+/// After this function control flow looks like it should be, but
+/// branches and PHI nodes only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
+
+ BasicBlock *Exit = ParentRegion->getExit();
+ bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
+
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Conditions.clear();
+ LoopConds.clear();
+
+ PrevNode = 0;
+ Visited.clear();
+
+ while (!Order.empty()) {
+ handleLoops(EntryDominatesExit, 0);
+ }
+
+ if (PrevNode)
+ changeExit(PrevNode, Exit, EntryDominatesExit);
+ else
+ assert(EntryDominatesExit);
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void AMDGPUStructurizeCFG::rebuildSSA() {
+ SSAUpdater Updater;
+ for (Region::block_iterator I = ParentRegion->block_begin(),
+ E = ParentRegion->block_end();
+ I != E; ++I) {
+
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+
+ bool Initialized = false;
+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+ Next = I->getNext();
+
+ Instruction *User = cast<Instruction>(I->getUser());
+ if (User->getParent() == BB) {
+ continue;
+
+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(*I) == BB)
+ continue;
+ }
+
+ if (DT->dominates(II, User))
+ continue;
+
+ if (!Initialized) {
+ Value *Undef = UndefValue::get(II->getType());
+ Updater.Initialize(II->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(BB, II);
+ Initialized = true;
+ }
+ Updater.RewriteUseAfterInsertions(*I);
+ }
+ }
+ }
+}
+
+/// \brief Run the transformation for each region found
+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ Func = R->getEntry()->getParent();
+ ParentRegion = R;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ orderNodes();
+ collectInfos();
+ createFlow();
+ insertConditions(false);
+ insertConditions(true);
+ setPhiValues();
+ rebuildSSA();
+
+ // Cleanup
+ Order.clear();
+ Visited.clear();
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Predicates.clear();
+ Conditions.clear();
+ Loops.clear();
+ LoopPreds.clear();
+ LoopConds.clear();
+
+ return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createAMDGPUStructurizeCFGPass() {
+ return new AMDGPUStructurizeCFG();
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
new file mode 100644
index 000000000000..0f356a1c3f11
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
+ InstrItins = getInstrItineraryForCPU(CPU);
+
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ StringRef GPU = CPU;
+ Is64bit = false;
+ DefaultSize[0] = 64;
+ DefaultSize[1] = 1;
+ DefaultSize[2] = 1;
+ ParseSubtargetFeatures(GPU, FS);
+ DevName = GPU;
+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
+}
+
+AMDGPUSubtarget::~AMDGPUSubtarget() {
+ delete Device;
+}
+
+bool
+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDGPUSubtarget::is64bit() const {
+ return Is64bit;
+}
+bool
+AMDGPUSubtarget::isTargetELF() const {
+ return false;
+}
+size_t
+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
+ if (dim > 3) {
+ return 1;
+ } else {
+ return DefaultSize[dim];
+ }
+}
+
+std::string
+AMDGPUSubtarget::getDataLayout() const {
+ if (!Device) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return Device->getDataLayout();
+}
+
+std::string
+AMDGPUSubtarget::getDeviceName() const {
+ return DevName;
+}
+const AMDGPUDevice *
+AMDGPUSubtarget::device() const {
+ return Device;
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
new file mode 100644
index 000000000000..1973fc6d544c
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -0,0 +1,65 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUSUBTARGET_H
+#define AMDGPUSUBTARGET_H
+#include "AMDILDevice.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+private:
+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
+ const AMDGPUDevice *Device;
+ size_t DefaultSize[3];
+ std::string DevName;
+ bool Is64bit;
+ bool Is32on64bit;
+ bool DumpCode;
+ bool R600ALUInst;
+
+ InstrItineraryData InstrItins;
+
+public:
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual ~AMDGPUSubtarget();
+
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ const AMDGPUDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ bool dumpCode() const { return DumpCode; }
+ bool r600ALUEncoding() const { return R600ALUInst; }
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
new file mode 100644
index 000000000000..e7ea876e2abb
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -0,0 +1,164 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU target machine contains all of the hardware specific
+/// information needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeR600Target() {
+ // Register the target
+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ Layout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ IntrinsicInfo(this),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ // TLInfo uses InstrInfo so it must be initialized after.
+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createR600MachineScheduler);
+ }
+ }
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUStructurizeCFGPass());
+ addPass(createSIAnnotateControlFlowPass());
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ addPass(createAMDGPUPeepholeOpt(*TM));
+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ // This callbacks this pass uses are not implemented yet on SI.
+ addPass(createAMDGPUIndirectAddressingPass(*TM));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ addPass(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIInsertWaits(*TM));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+
+ addPass(&IfConverterID);
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUCFGPreparationPass(*TM));
+ addPass(createAMDGPUCFGStructurizerPass(*TM));
+ addPass(createR600EmitClauseMarkers(*TM));
+ addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
+ addPass(&FinalizeMachineBundlesID);
+ } else {
+ addPass(createSILowerControlFlowPass(*TM));
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
new file mode 100644
index 000000000000..2afe7873a90c
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -0,0 +1,70 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+ AMDGPUSubtarget Subtarget;
+ const DataLayout Layout;
+ AMDGPUFrameLowering FrameLowering;
+ AMDGPUIntrinsicInfo IntrinsicInfo;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ const InstrItineraryData* InstrItins;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const DataLayout* getDataLayout() const { return &Layout; }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
new file mode 100644
index 000000000000..39ab664d1018
--- /dev/null
+++ b/lib/Target/R600/AMDIL.h
@@ -0,0 +1,121 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// This file contains the entry points for global functions defined in the LLVM
+/// AMDGPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H
+#define AMDIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDGPUInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// Instruction selection passes.
+FunctionPass*
+ createAMDGPUISelDag(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUPeepholeOpt(TargetMachine &TM);
+
+// Pre emit passes.
+FunctionPass*
+ createAMDGPUCFGPreparationPass(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUCFGStructurizerPass(TargetMachine &TM);
+
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, ///< Address space for private memory.
+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory
+ LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ REGION_ADDRESS = 4, ///< Address space for region memory.
+ ADDRESS_NONE = 5, ///< Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+ CONSTANT_BUFFER_0 = 8,
+ CONSTANT_BUFFER_1 = 9,
+ CONSTANT_BUFFER_2 = 10,
+ CONSTANT_BUFFER_3 = 11,
+ CONSTANT_BUFFER_4 = 12,
+ CONSTANT_BUFFER_5 = 13,
+ CONSTANT_BUFFER_6 = 14,
+ CONSTANT_BUFFER_7 = 15,
+ CONSTANT_BUFFER_8 = 16,
+ CONSTANT_BUFFER_9 = 17,
+ CONSTANT_BUFFER_10 = 18,
+ CONSTANT_BUFFER_11 = 19,
+ CONSTANT_BUFFER_12 = 20,
+ CONSTANT_BUFFER_13 = 21,
+ CONSTANT_BUFFER_14 = 22,
+ CONSTANT_BUFFER_15 = 23,
+ LAST_ADDRESS = 24
+};
+
+} // namespace AMDGPUAS
+
+} // end namespace llvm
+#endif // AMDIL_H
diff --git a/lib/Target/R600/AMDIL7XXDevice.cpp b/lib/Target/R600/AMDIL7XXDevice.cpp
new file mode 100644
index 000000000000..ea6ac34f570c
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.cpp
@@ -0,0 +1,115 @@
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ DeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ DeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ DeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
+}
+
+void AMDGPU7XXDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDGPU7XXDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDGPU7XXDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
+ return 1;
+}
+
+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
+ setCaps();
+}
+
+AMDGPU770Device::~AMDGPU770Device() {
+}
+
+void AMDGPU770Device::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU770Device::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
+}
+
+AMDGPU710Device::~AMDGPU710Device() {
+}
+
+size_t AMDGPU710Device::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDIL7XXDevice.h b/lib/Target/R600/AMDIL7XXDevice.h
new file mode 100644
index 000000000000..1cf4ca415a4c
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.h
@@ -0,0 +1,72 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDIL7XXDEVICEIMPL_H
+#define AMDIL7XXDEVICEIMPL_H
+#include "AMDILDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
+///
+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
+/// support the minimal features that are required to be considered OpenCL 1.0
+/// compliant and nothing more.
+class AMDGPU7XXDevice : public AMDGPUDevice {
+public:
+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
+/// derivative cards.
+///
+/// The difference between this device and the base class is this device device
+/// adds support for double precision and has a larger wavefront size.
+class AMDGPU770Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU770Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU710Device class derives from the 7XX base class.
+///
+/// This class is a smaller derivative, so we need to overload some of the
+/// functions in order to correctly specify this information.
+class AMDGPU710Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU710Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU710Device();
+ virtual size_t getWavefrontSize() const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
new file mode 100644
index 000000000000..c12cedcf7fd5
--- /dev/null
+++ b/lib/Target/R600/AMDILBase.td
@@ -0,0 +1,85 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDGPUDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "Is64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "Is32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+ "R600ALUInst",
+ "false",
+ "Older version of ALU instructions encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
new file mode 100644
index 000000000000..b0cd0f9756a4
--- /dev/null
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3051 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct {
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer {
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+private:
+ void reversePredicateSetter(typename BlockT::iterator);
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ const AMDGPURegisterInfo *TRI;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ bool changed = false;
+
+ //FIXME: if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::prepare\n";
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::run\n";
+ func.viewCFG();
+ }
+
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ }
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1) {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+
+ // TODO: Simplify, maybe separate function?
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0);
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDGPU::IF_PREDICATE_SET, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ } else {
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false) {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ branchInstr->eraseFromParent();
+ } else {
+ // if we've arrived here then we've already erased the branch instruction
+ // travel back up the basic block to see the last reference of our debug location
+ // we've just inserted that reference here so it should be representative
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+ preBranchBlk->insert(preBranchBlk->begin(),
+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+ i - 1));
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDGPU branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder MIB(*funcRep, newInstr);
+ MIB.addMBB(loopHeader);
+ MIB.addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDGPU
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+
+protected:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const AMDGPURegisterInfo *TRI;
+
+public:
+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+
+private:
+
+};
+
+} //end of namespace llvm
+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+ TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
+}
+
+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPrepare(TargetMachine &tm);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
+ : AMDGPUCFGStructurizer(ID, tm ) {
+}
+const char *AMDGPUCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPerform(TargetMachine &tm);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
+: AMDGPUCFGStructurizer(ID, tm) {
+}
+
+const char *AMDGPUCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDGPUCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// this class is tailor to the AMDGPU backend
+template<>
+struct CFGStructTraits<AMDGPUCFGStructurizer> {
+ typedef int RegiT;
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ case AMDGPU::BRANCH:
+ return true;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
+ blk->getParent()->getTarget().getInstrInfo());
+
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!TII->isMov(instr->getOpcode())) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(oldInstr->getOperand(1).getReg(), false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(dstReg, RegState::Define); //set target
+ MIB.addReg(src1Reg); //set src value
+ MIB.addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDGPU instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getTrueBranch(branchInstr) == oldBlk) {
+ setTrueBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDGPU::CONTINUE
+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDGPUCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPrepare(tm );
+}
+
+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
+ *this,
+ TRI);
+}
+
+// createAMDGPUCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPerform(tm );
+}
+
+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
+ *this,
+ TRI);
+}
diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp
new file mode 100644
index 000000000000..db8e01ea4043
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.cpp
@@ -0,0 +1,132 @@
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ DeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDGPUDevice::~AMDGPUDevice() {
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDGPUDevice::getMaxGDSSize() const {
+ return 0;
+}
+
+uint32_t
+AMDGPUDevice::getDeviceFlag() const {
+ return DeviceFlag;
+}
+
+size_t AMDGPUDevice::getMaxNumCBs() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxCBSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxScratchSize() const {
+ return 65536;
+}
+
+uint32_t AMDGPUDevice::getStackAlignment() const {
+ return 16;
+}
+
+void AMDGPUDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::HalfOps);
+ mSWBits.set(AMDGPUDeviceInfo::ByteOps);
+ mSWBits.set(AMDGPUDeviceInfo::ShortOps);
+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
+ mSWBits.set(AMDGPUDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDGPUDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+}
+
+AMDGPUDeviceInfo::ExecutionMode
+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Software;
+ }
+
+ return AMDGPUDeviceInfo::Unsupported;
+
+}
+
+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
+}
+
+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
+}
+
+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
+}
+
+std::string
+AMDGPUDevice::getDataLayout() const {
+ std::string DataLayout = std::string(
+ "e"
+ "-p:32:32:32"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
+ "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n32:64"
+ );
+
+ if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) {
+ DataLayout.append("-f64:64:64");
+ }
+
+ return DataLayout;
+}
diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h
new file mode 100644
index 000000000000..97df98cafb2a
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.h
@@ -0,0 +1,117 @@
+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+//
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEIMPL_H
+#define AMDILDEVICEIMPL_H
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDGPUDevice {
+public:
+ AMDGPUDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ /// \returns The max LDS size that the hardware supports. Size is in
+ /// bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ /// \returns The max GDS size that the hardware supports if the GDS is
+ /// supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ /// \returns The max number of hardware constant address spaces that
+ /// are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ /// \returns The max number of bytes a single hardware constant buffer
+ /// can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ /// \returns The max number of bytes allowed by the hardware scratch
+ /// buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ /// \brief Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ /// \returns The number of work-items that exist in a single hardware
+ /// wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ /// \brief Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ /// \brief Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ /// \brief Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ /// \brief Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+
+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDGPUSubtarget *mSTM;
+ uint32_t DeviceFlag;
+private:
+ AMDGPUDeviceInfo::ExecutionMode
+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
new file mode 100644
index 000000000000..9605fbe63340
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDGPUDeviceInfo {
+
+AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
+ AMDGPUSubtarget *ptr,
+ bool is64bit, bool is64on32bit) {
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDGPU710Device(ptr);
+ case '7':
+ return new AMDGPU770Device(ptr);
+ default:
+ return new AMDGPU7XXDevice(ptr);
+ }
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPURedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCedarDevice(ptr);
+ } else if (deviceName == "barts" || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDGPUSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPU7XXDevice(ptr);
+ }
+}
+} // End namespace AMDGPUDeviceInfo
+} // End namespace llvm
diff --git a/lib/Target/R600/AMDILDeviceInfo.h b/lib/Target/R600/AMDILDeviceInfo.h
new file mode 100644
index 000000000000..4b2c3a53c79f
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.h
@@ -0,0 +1,88 @@
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEINFO_H
+#define AMDILDEVICEINFO_H
+
+
+#include <string>
+
+namespace llvm {
+ class AMDGPUDevice;
+ class AMDGPUSubtarget;
+ namespace AMDGPUDeviceInfo {
+ /// Each Capabilities can be executed using a hardware instruction,
+ /// emulated with a sequence of software instructions, or not
+ /// supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, ///< Unsupported feature on the card(Default value)
+ /// This is the execution mode that is set if the feature is emulated in
+ /// software.
+ Software,
+ /// This execution mode is set if the feature exists natively in hardware
+ Hardware
+ };
+
+ enum Caps {
+ HalfOps = 0x1, ///< Half float is supported or not.
+ DoubleOps = 0x2, ///< Double is supported or not.
+ ByteOps = 0x3, ///< Byte(char) is support or not.
+ ShortOps = 0x4, ///< Short is supported or not.
+ LongOps = 0x5, ///< Long is supported or not.
+ Images = 0x6, ///< Images are supported or not.
+ ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, ///< Constant/CB memory.
+ LocalMem = 0x9, ///< Local/LDS memory.
+ PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
+ RegionMem = 0xB, ///< OCL GDS Memory Extension.
+ FMA = 0xC, ///< Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, ///< ReservedFlag
+ NoAlias = 0x10, ///< Cached loads.
+ Signed24BitOps = 0x11, ///< Peephole Optimization.
+ /// Debug mode implies that no hardware features or optimizations
+ /// are performned and that all memory access go through a single
+ /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12,
+ CachedMem = 0x13, ///< Cached mem is available or not.
+ BarrierDetect = 0x14, ///< Detect duplicate barriers.
+ Reserved1 = 0x15, ///< Reserved flag
+ ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
+ NoInline = 0x19, ///< Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
+ /// If more capabilities are required, then
+ /// this number needs to be increased.
+ /// All capabilities must come before this
+ /// number.
+ MaxNumberCapabilities = 0x20
+ };
+ /// These have to be in order with the older generations
+ /// having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, ///< 7XX based devices.
+ HD5XXX, ///< Evergreen based devices.
+ HD6XXX, ///< NI/Evergreen+ based devices.
+ HD7XXX, ///< Southern Islands based devices.
+ HDTEST, ///< Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ AMDGPUDevice*
+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
+ bool is64bit = false, bool is64on32bit = false);
+ } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // AMDILDEVICEINFO_H
diff --git a/lib/Target/R600/AMDILDevices.h b/lib/Target/R600/AMDILDevices.h
new file mode 100644
index 000000000000..636fa6d35947
--- /dev/null
+++ b/lib/Target/R600/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDIL_DEVICES_H
+#define AMDIL_DEVICES_H
+// Include all of the device specific header files
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // AMDIL_DEVICES_H
diff --git a/lib/Target/R600/AMDILEvergreenDevice.cpp b/lib/Target/R600/AMDILEvergreenDevice.cpp
new file mode 100644
index 000000000000..c5213a041005
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.cpp
@@ -0,0 +1,169 @@
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
+: AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ DeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ DeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ DeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ DeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
+}
+
+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD5XXX;
+}
+
+void AMDGPUEvergreenDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDGPUDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mSWBits.set(AMDGPUDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mHWBits.set(AMDGPUDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Images);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDGPUDeviceInfo::NoAlias);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::CachedMem);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
+ mHWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mHWBits.set(AMDGPUDeviceInfo::TmrReg);
+}
+
+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
+}
+
+void AMDGPUCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+}
+
+
+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
+}
+
+void AMDGPUCedarDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPUCedarDevice::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
+
+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
+}
+
+void AMDGPURedwoodDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDILEvergreenDevice.h b/lib/Target/R600/AMDILEvergreenDevice.h
new file mode 100644
index 000000000000..ea90f774a856
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.h
@@ -0,0 +1,93 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILEVERGREENDEVICE_H
+#define AMDILEVERGREENDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
+/// series of cards.
+///
+/// This class contains information required to differentiate
+/// the Evergreen device from the generic AMDGPUDevice. This device represents
+/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDGPUEvergreenDevice : public AMDGPUDevice {
+public:
+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+};
+
+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
+/// support for double precision operations. This device is used to represent
+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+/// and HD59XX cards.
+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCypressDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCypressDevice();
+private:
+ virtual void setCaps();
+};
+
+
+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
+/// devices.
+///
+/// This class differs from the base AMDGPUEvergreenDevice in that the
+/// device is a ~quarter of the 'Juniper'. These are commercially known as the
+/// HD54XX and HD53XX series of cards.
+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCedarDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
+/// devices.
+///
+/// This class differs from the base class, in that these devices are
+/// considered about half of a 'Juniper' device. These are commercially known as
+/// the HD55XX and HD56XX series of cards.
+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPURedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+} // namespace llvm
+#endif // AMDILEVERGREENDEVICE_H
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fa8f62de9c0a
--- /dev/null
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,643 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDGPUSubtarget &Subtarget;
+public:
+ AMDGPUDAGToDAGISel(TargetMachine &TM);
+ virtual ~AMDGPUDAGToDAGISel();
+
+ SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+ virtual void PostprocessISelDAG();
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isParamLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg, SDValue& Offset);
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
+ ) {
+ return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
+ )
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::BUILD_VECTOR: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ SDValue RegSeqArgs[9] = {
+ CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+ bool IsRegSeq = true;
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ }
+ if (!IsRegSeq)
+ break;
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+ RegSeqArgs, 2 * N->getNumOperands() + 1);
+ }
+ case ISD::ConstantFP:
+ case ISD::Constant: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ // XXX: Custom immediate lowering not implemented yet. Instead we use
+ // pseudo instructions defined in SIInstructions.td
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+ uint64_t ImmValue = 0;
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+
+ if (N->getOpcode() == ISD::ConstantFP) {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::f64);
+
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
+ APFloat Value = C->getValueAPF();
+ float FloatValue = Value.convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = Value.bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::i64);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (C->getZExtValue() == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (C->getZExtValue() == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = C->getZExtValue();
+ }
+ }
+
+ for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
+ Use != SDNode::use_end(); Use = Next) {
+ Next = llvm::next(Use);
+ std::vector<SDValue> Ops;
+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
+ Ops.push_back(Use->getOperand(i));
+ }
+
+ if (!Use->isMachineOpcode()) {
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ // We can only use literal constants (e.g. AMDGPU::ZERO,
+ // AMDGPU::ONE, etc) in machine opcodes.
+ continue;
+ }
+ } else {
+ if (!TII->isALUInstr(Use->getMachineOpcode()) ||
+ (TII->get(Use->getMachineOpcode()).TSFlags &
+ R600_InstFlag::VECTOR)) {
+ continue;
+ }
+
+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
+ assert(ImmIdx != -1);
+
+ // subtract one from ImmIdx, because the DST operand is usually index
+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
+ ImmIdx--;
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
+ assert(C);
+
+ if (C->getZExtValue() != 0) {
+ // This instruction is already using an immediate.
+ continue;
+ }
+
+ // Set the immediate value
+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
+ }
+ }
+ // Set the immediate register
+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
+
+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
+ }
+ break;
+ }
+ }
+ SDNode *Result = SelectCode(N);
+
+ // Fold operands of selected node
+
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ if (Result && Result->isMachineOpcode() &&
+ !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
+ && TII->isALUInstr(Result->getMachineOpcode())) {
+ // Fold FNEG/FABS/CONST_ADDRESS
+ // TODO: Isel can generate multiple MachineInst, we need to recursively
+ // parse Result
+ bool IsModified = false;
+ do {
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+ if (IsModified) {
+ Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+ }
+ } while (IsModified);
+
+ // If node has a single use which is CLAMP_R600, folds it
+ if (Result->hasOneUse() && Result->isMachineOpcode()) {
+ SDNode *PotentialClamp = *Result->use_begin();
+ if (PotentialClamp->isMachineOpcode() &&
+ PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
+ unsigned ClampIdx =
+ TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
+ std::vector<SDValue> Ops;
+ unsigned NumOp = Result->getNumOperands();
+ for (unsigned i = 0; i < NumOp; ++i) {
+ Ops.push_back(Result->getOperand(i));
+ }
+ Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ Result = CurDAG->SelectNodeTo(PotentialClamp,
+ Result->getMachineOpcode(), PotentialClamp->getVTList(),
+ Ops.data(), NumOp);
+ }
+ }
+ }
+ }
+
+ return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2)
+ };
+ int SelIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
+ -1
+ };
+
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return false;
+ SDValue Operand = Ops[OperandIdx[i] - 1];
+ switch (Operand.getOpcode()) {
+ case AMDGPUISD::CONST_ADDRESS: {
+ SDValue CstOffset;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
+ }
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
+ }
+ case ISD::FNEG:
+ if (NegIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::FABS:
+ if (AbsIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::BITCAST:
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ assert(!"Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+ return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!dyn_cast<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode * IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ CurDAG->getEntryNode().getDebugLoc(),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+
+ return true;
+}
+
+void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
+
+ // Go over all selected nodes and try to fold them a bit more
+ const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+
+ MachineSDNode *Node = dyn_cast<MachineSDNode>(I);
+ if (!Node)
+ continue;
+
+ SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG);
+ if (ResNode != Node)
+ ReplaceUses(Node, ResNode);
+ }
+}
+
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
new file mode 100644
index 000000000000..922cac12b98e
--- /dev/null
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -0,0 +1,647 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetLowering functions borrowed from AMDIL.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::InitAMDILLowering() {
+ int types[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] = {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] = {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t NumTypes = sizeof(types) / sizeof(*types);
+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < NumTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+
+ // Use the default implementation.
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ MaxStoresPerMemcpy = 4096;
+ MaxStoresPerMemmove = 4096;
+ MaxStoresPerMemset = 4096;
+
+}
+
+bool
+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const {
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
new file mode 100644
index 000000000000..110f1476513b
--- /dev/null
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -0,0 +1,207 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget.device()"
+ "->getGeneration() >="
+ "AMDGPUDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+ ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+ SDTCisEltOfVec<1, 0>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRI32:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI32:$src0)]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRF32:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, GPRF32:$src0)]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp
new file mode 100644
index 000000000000..4ddb057d80a7
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,79 @@
+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDIL.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo() {
+}
+
+std::string
+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const {
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned int
+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDGPUIntrinsic::ID IntrinsicID
+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+ // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+Function*
+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const {
+ llvm_unreachable("Not implemented");
+}
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h
new file mode 100644
index 000000000000..35559e23fceb
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.h
@@ -0,0 +1,49 @@
+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef AMDIL_INTRINSICS_H
+#define AMDIL_INTRINSICS_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+class TargetMachine;
+
+namespace AMDGPUIntrinsic {
+enum ID {
+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDGPU_intrinsics
+};
+
+} // end namespace AMDGPUIntrinsic
+
+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+public:
+ AMDGPUIntrinsicInfo(TargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+};
+
+} // end namespace llvm
+
+#endif // AMDIL_INTRINSICS_H
+
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
new file mode 100644
index 000000000000..6ec3559af24c
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -0,0 +1,232 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+ UnaryIntInt;
+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+ UnaryIntInt;
+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+ TernaryIntInt;
+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+ TernaryIntInt;
+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+ QuaternaryIntInt;
+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+ TernaryIntInt;
+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+ BinaryIntInt;
+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+ BinaryIntInt;
+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+ BinaryIntInt;
+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+ BinaryIntInt;
+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
+ BinaryIntFloat;
+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+ BinaryIntInt;
+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+ BinaryIntInt;
+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
+ BinaryIntFloat;
+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+ TernaryIntInt;
+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+ UnaryIntFloat;
+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+ TernaryIntFloat;
+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+ UnaryIntFloat;
+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+ UnaryIntFloat;
+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+ UnaryIntFloat;
+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+ UnaryIntFloat;
+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+ UnaryIntFloat;
+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+ UnaryIntFloat;
+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+ UnaryIntFloat;
+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+ UnaryIntFloat;
+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+ UnaryIntFloat;
+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+ UnaryIntFloat;
+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+ UnaryIntFloat;
+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+ UnaryIntFloat;
+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+ UnaryIntFloat;
+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+ UnaryIntFloat;
+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+ TernaryIntFloat;
+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+ UnaryIntFloat;
+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+ UnaryIntFloat;
+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+ TernaryIntFloat;
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
+
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+ ConvertIntITOF;
+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+ ConvertIntITOF;
+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+}
diff --git a/lib/Target/R600/AMDILNIDevice.cpp b/lib/Target/R600/AMDILNIDevice.cpp
new file mode 100644
index 000000000000..47c3f7f209d6
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.cpp
@@ -0,0 +1,65 @@
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ DeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ DeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ DeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ DeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDGPUNIDevice::~AMDGPUNIDevice() {
+}
+
+size_t
+AMDGPUNIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUNIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD6XXX;
+}
+
+
+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
+ : AMDGPUNIDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
+}
+
+void
+AMDGPUCaymanDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+}
+
diff --git a/lib/Target/R600/AMDILNIDevice.h b/lib/Target/R600/AMDILNIDevice.h
new file mode 100644
index 000000000000..24a640845eab
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.h
@@ -0,0 +1,57 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILNIDEVICE_H
+#define AMDILNIDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
+/// cards.
+///
+/// It is very similiar to the AMDGPUEvergreenDevice, with the major
+/// exception being differences in wavefront size and hardware capabilities. The
+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+/// integer operations
+class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUNIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+};
+
+/// Just as the AMDGPUCypressDevice is the double capable version of the
+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
+/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
+class AMDGPUCaymanDevice: public AMDGPUNIDevice {
+public:
+ AMDGPUCaymanDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUCaymanDevice();
+private:
+ virtual void setCaps();
+};
+
+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // AMDILNIDEVICE_H
diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 000000000000..3a28038666f7
--- /dev/null
+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1215 @@
+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILDevices.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDGPUSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F) {
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDGPUPeepholeOpt(TargetMachine &tm) {
+ return new AMDGPUPeepholeOpt(tm);
+ }
+} // llvm namespace
+
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
+ : FunctionPass(ID), TM(tm) {
+ mDebug = DEBUGME;
+ optLevel = TM.getOptLevel();
+
+}
+
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
+}
+
+const char *
+AMDGPUPeepholeOpt::getPassName() const {
+ return "AMDGPU PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty) {
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+ mConvertAtomics = false;
+ }
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift) {
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ return false;
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "llvm.AMDGPU.bit.extract.u32";
+ if (isVector) {
+ name += ".v" + itostr(numEle) + "i32";
+ } else {
+ name += ".";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ ShiftInst->getOperand(0),
+ shiftValConst,
+ newMaskConst
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+ if (!Ty->isStructTy()) {
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[2] = {
+ CI->getOperand(0),
+ CI->getOperand(1)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
+ return (CI != NULL
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+ == "__amdil_get_local_size_int");
+}
+
+bool
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman")) {
+ return false;
+ }
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+ == "__amdil_improved_div";
+}
+
+void
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+ funcNameIdx = CI->getNumOperands() - 1;
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+ samplerIdx = 1;
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::doInitialization(Module &M) {
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::doFinalization(Module &M) {
+ return false;
+}
+
+void
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td
new file mode 100644
index 000000000000..b9d033432e8c
--- /dev/null
+++ b/lib/Target/R600/AMDILRegisterInfo.td
@@ -0,0 +1,107 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+ field bits<16> Value;
+ let Value = num;
+ let Namespace = "AMDGPU";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp
new file mode 100644
index 000000000000..0d1de3d11eb4
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.cpp
@@ -0,0 +1,48 @@
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+
+using namespace llvm;
+
+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+}
+AMDGPUSIDevice::~AMDGPUSIDevice() {
+}
+
+size_t
+AMDGPUSIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUSIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDGPUSIDevice::getDataLayout() const {
+ return std::string(
+ "e"
+ "-p:64:64:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128"
+ "-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+ "-v2048:2048:2048"
+ "-n32:64"
+ );
+}
diff --git a/lib/Target/R600/AMDILSIDevice.h b/lib/Target/R600/AMDILSIDevice.h
new file mode 100644
index 000000000000..5b2cb2502211
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.h
@@ -0,0 +1,39 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILSIDEVICE_H
+#define AMDILSIDEVICE_H
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
+/// of cards.
+class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUSIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+};
+
+} // namespace llvm
+#endif // AMDILSIDEVICE_H
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
new file mode 100644
index 000000000000..8efba5846bef
--- /dev/null
+++ b/lib/Target/R600/CMakeLists.txt
@@ -0,0 +1,59 @@
+set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
+
+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+add_public_tablegen_target(AMDGPUCommonTableGen)
+
+add_llvm_target(R600CodeGen
+ AMDIL7XXDevice.cpp
+ AMDILCFGStructurizer.cpp
+ AMDILDevice.cpp
+ AMDILDeviceInfo.cpp
+ AMDILEvergreenDevice.cpp
+ AMDILIntrinsicInfo.cpp
+ AMDILISelDAGToDAG.cpp
+ AMDILISelLowering.cpp
+ AMDILNIDevice.cpp
+ AMDILPeepholeOptimizer.cpp
+ AMDILSIDevice.cpp
+ AMDGPUAsmPrinter.cpp
+ AMDGPUFrameLowering.cpp
+ AMDGPUIndirectAddressing.cpp
+ AMDGPUMCInstLower.cpp
+ AMDGPUMachineFunction.cpp
+ AMDGPUSubtarget.cpp
+ AMDGPUStructurizeCFG.cpp
+ AMDGPUTargetMachine.cpp
+ AMDGPUISelLowering.cpp
+ AMDGPUConvertToISA.cpp
+ AMDGPUInstrInfo.cpp
+ AMDGPURegisterInfo.cpp
+ R600ControlFlowFinalizer.cpp
+ R600EmitClauseMarkers.cpp
+ R600ExpandSpecialInstrs.cpp
+ R600InstrInfo.cpp
+ R600ISelLowering.cpp
+ R600MachineFunctionInfo.cpp
+ R600MachineScheduler.cpp
+ R600RegisterInfo.cpp
+ SIAnnotateControlFlow.cpp
+ SIInsertWaits.cpp
+ SIInstrInfo.cpp
+ SIISelLowering.cpp
+ SILowerControlFlow.cpp
+ SIMachineFunctionInfo.cpp
+ SIRegisterInfo.cpp
+ )
+
+add_dependencies(LLVMR600CodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 000000000000..10547a598805
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,172 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ printInstruction(MI, OS);
+
+ printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ switch (Op.getReg()) {
+ // This is the default predicate state, so we don't need to print it.
+ case AMDGPU::PRED_SEL_OFF: break;
+ default: O << getRegisterName(Op.getReg()); break;
+ }
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ O << Op.getFPImm();
+ } else if (Op.isExpr()) {
+ const MCExpr *Exp = Op.getExpr();
+ Exp->print(O);
+ } else {
+ assert(!"unknown operand type in printOperand");
+ }
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+
+ if (Imm == 2) {
+ O << "P0";
+ } else if (Imm == 1) {
+ O << "P20";
+ } else if (Imm == 0) {
+ O << "P10";
+ } else {
+ assert(!"Invalid interpolation parameter slot");
+ }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo + 1, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, StringRef Asm) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm());
+ if (Op.getImm() == 1) {
+ O << Asm;
+ }
+}
+
+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "|");
+}
+
+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ union Literal {
+ float f;
+ int32_t i;
+ } L;
+
+ L.i = MI->getOperand(OpNo).getImm();
+ O << L.i << "(" << L.f << ")";
+}
+
+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, " *");
+}
+
+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "-");
+}
+
+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ switch (MI->getOperand(OpNo).getImm()) {
+ default: break;
+ case 1:
+ O << " * 2.0";
+ break;
+ case 2:
+ O << " * 4.0";
+ break;
+ case 3:
+ O << " / 2.0";
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "+");
+}
+
+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() == 0) {
+ O << " (MASKED)";
+ }
+}
+
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const char * chans = "XYZW";
+ int sel = MI->getOperand(OpNo).getImm();
+
+ int chan = sel & 3;
+ sel >>= 2;
+
+ if (sel >= 512) {
+ sel -= 512;
+ int cb = sel >> 12;
+ sel &= 4095;
+ O << cb << "[" << sel << "]";
+ } else if (sel >= 448) {
+ sel -= 448;
+ O << sel;
+ } else if (sel >= 0){
+ O << sel;
+ }
+
+ if (sel >= 0)
+ O << "." << chans[chan];
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 000000000000..767a7082cc2c
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,54 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTPRINTER_H
+#define AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ //Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUINSTRPRINTER_H
diff --git a/lib/Target/R600/InstPrinter/CMakeLists.txt b/lib/Target/R600/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..069c55ba948e
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600AsmPrinter
+ AMDGPUInstPrinter.cpp
+ )
+
+add_dependencies(LLVMR600AsmPrinter AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/InstPrinter/LLVMBuild.txt b/lib/Target/R600/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..ec0be89f104c
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600AsmPrinter
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
+
diff --git a/lib/Target/R600/InstPrinter/Makefile b/lib/Target/R600/InstPrinter/Makefile
new file mode 100644
index 000000000000..a794cc1124ed
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt
index 277620bf4e59..f2a7554e5269 100644
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ b/lib/Target/R600/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -16,17 +16,17 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = MCTargetDesc TargetInfo
+subdirectories = InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
-name = CellSPU
+name = R600
parent = Target
has_asmprinter = 1
[component_1]
type = Library
-name = CellSPUCodeGen
-parent = CellSPU
-required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
-add_to_library_groups = CellSPU
+name = R600CodeGen
+parent = R600
+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 000000000000..98fca432670d
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ //XXX: Implement if necessary.
+ }
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ assert(!"Not implemented");
+ }
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+ AMDGPUAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual unsigned getNumFixupKinds() const { return 0; };
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return false;
+ }
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(!"Not implemented");
+ }
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return true;
+ }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+ Asm.writeSectionData(I, Layout);
+ }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+ raw_ostream &OS) const {
+ return new AMDGPUMCObjectWriter(OS);
+}
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+
+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+ assert(Fixup.getKind() == FK_PCRel_4);
+ *Dst = (Value - 4) / 4;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..b7cdd7c8cde9
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,83 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
+ HasSingleParameterDotFile = false;
+ WeakDefDirective = 0;
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = true;
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+ LabelSuffix = ":";
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = false;
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = 0;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+ HasAggressiveSymbolFolding = true;
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = false;
+ WeakRefDirective = ".weakref\t";
+ LinkOnceDirective = 0;
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = ".offset";
+
+}
+
+const char*
+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
+ return 0;
+}
+
+const MCSection*
+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
+ return 0;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 000000000000..3ad0fa6824ab
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMCASMINFO_H
+#define AMDGPUMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+
+class Target;
+class StringRef;
+
+class AMDGPUMCAsmInfo : public MCAsmInfo {
+public:
+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
+ const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+};
+} // namespace llvm
+#endif // AMDGPUMCASMINFO_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 000000000000..cd3a7ce65aa5
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,40 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCInst;
+class MCOperand;
+
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+public:
+
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..072ee49b6311
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,113 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAMDGPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo * X = new MCSubtargetInfo();
+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
+ } else {
+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
+extern "C" void LLVMInitializeR600TargetMC() {
+
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+
+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+
+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+
+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 000000000000..363a4af3f3a4
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,55 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDGPUMCTARGETDESC_H
+#define AMDGPUMCTARGETDESC_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDGPUTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif // AMDGPUMCTARGETDESC_H
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..37e714c2e7b8
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+add_llvm_library(LLVMR600Desc
+ AMDGPUAsmBackend.cpp
+ AMDGPUMCTargetDesc.cpp
+ AMDGPUMCAsmInfo.cpp
+ R600MCCodeEmitter.cpp
+ SIMCCodeEmitter.cpp
+ )
+
+add_dependencies(LLVMR600Desc AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..b1beab0bb301
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Desc
+parent = R600
+required_libraries = R600AsmPrinter R600Info MC
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/Makefile b/lib/Target/R600/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..8894a7607f4f
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 000000000000..927bcbd8305c
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,585 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This code emitter outputs bytecode that is understood by the r600g driver
+/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
+/// but it still needs to be run through a finalizer in order to be executed
+/// by the GPU.
+///
+/// [1] http://www.mesa3d.org/
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+ R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+
+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ /// \brief Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+private:
+
+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+ raw_ostream &OS) const;
+ void EmitDst(const MCInst &MI, raw_ostream &OS) const;
+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
+
+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
+
+ void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
+
+ void Emit(uint32_t value, raw_ostream &OS) const;
+ void Emit(uint64_t value, raw_ostream &OS) const;
+
+ unsigned getHWRegChan(unsigned reg) const;
+ unsigned getHWReg(unsigned regNo) const;
+
+ bool isFCOp(unsigned opcode) const;
+ bool isTexOp(unsigned opcode) const;
+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
+
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC,
+ INSTR_NATIVE,
+ INSTR_VTX,
+ INSTR_EXPORT,
+ INSTR_CFALU
+};
+
+enum FCInstr {
+ FC_IF_PREDICATE = 0,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK_PREDICATE,
+ FC_CONTINUE
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (isFCOp(MI.getOpcode())){
+ EmitFCInstr(MI, OS);
+ } else if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::BUNDLE ||
+ MI.getOpcode() == AMDGPU::KILL) {
+ return;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDGPU::STACK_SIZE: {
+ EmitByte(MI.getOperand(0).getImm(), OS);
+ break;
+ }
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(inst, OS);
+ break;
+ }
+ case AMDGPU::CONSTANT_LOAD_eg:
+ case AMDGPU::VTX_READ_PARAM_8_eg:
+ case AMDGPU::VTX_READ_PARAM_16_eg:
+ case AMDGPU::VTX_READ_PARAM_32_eg:
+ case AMDGPU::VTX_READ_PARAM_128_eg:
+ case AMDGPU::VTX_READ_GLOBAL_8_eg:
+ case AMDGPU::VTX_READ_GLOBAL_32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF : {
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+
+ EmitByte(INSTR_VTX, OS);
+ Emit(InstWord01, OS);
+ Emit(InstWord2, OS);
+ break;
+ }
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V: {
+ unsigned Opcode = MI.getOpcode();
+ bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = HasOffsets ? 3 : 0;
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+ uint32_t SrcSelect[4] = {0, 1, 2, 3};
+ uint32_t Offsets[3] = {0, 0, 0};
+ uint64_t CoordType[4] = {1, 1, 1, 1};
+
+ if (HasOffsets)
+ for (unsigned i = 0; i < 3; i++) {
+ int SignedOffset = MI.getOperand(i + 2).getImm();
+ Offsets[i] = (SignedOffset & 0x1F);
+ }
+
+
+ if (TextureType == TEXTURE_RECT ||
+ TextureType == TEXTURE_SHADOWRECT) {
+ CoordType[ELEMENT_X] = 0;
+ CoordType[ELEMENT_Y] = 0;
+ }
+
+ if (TextureType == TEXTURE_1D_ARRAY ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+ Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ CoordType[ELEMENT_Y] = 0;
+ } else {
+ CoordType[ELEMENT_Z] = 0;
+ SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (TextureType == TEXTURE_2D_ARRAY ||
+ TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ CoordType[ELEMENT_Z] = 0;
+ }
+
+
+ if ((TextureType == TEXTURE_SHADOW1D ||
+ TextureType == TEXTURE_SHADOW2D ||
+ TextureType == TEXTURE_SHADOWRECT ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ SrcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+ CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+ CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+ uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+ SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+ SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+ Offsets[2] << 10;
+
+ EmitByte(INSTR_TEX, OS);
+ Emit(Word01, OS);
+ Emit(Word2, OS);
+ break;
+ }
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::R600_ExportBuf: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_EXPORT, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_CFALU, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_TC:
+ case AMDGPU::CF_VC:
+ case AMDGPU::CF_CALL_FS:
+ return;
+ case AMDGPU::WHILE_LOOP:
+ case AMDGPU::END_LOOP:
+ case AMDGPU::LOOP_BREAK:
+ case AMDGPU::CF_CONTINUE:
+ case AMDGPU::CF_JUMP:
+ case AMDGPU::CF_ELSE:
+ case AMDGPU::POP: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ default:
+ EmitALUInstr(MI, Fixups, OS);
+ break;
+ }
+ }
+}
+
+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+
+ // Emit instruction type
+ EmitByte(INSTR_ALU, OS);
+
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+
+ //older alu have different encoding for instructions with one or two src
+ //parameters.
+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+ !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
+ uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
+ InstWord01 &= ~(0x3FFULL << 39);
+ InstWord01 |= ISAOpCode << 1;
+ }
+
+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
+
+ EmitByte(SrcNum, OS);
+
+ const unsigned SrcOps[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL}
+ };
+
+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
+ }
+
+ Emit(InstWord01, OS);
+ return;
+}
+
+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
+ raw_ostream &OS) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ union {
+ float f;
+ uint32_t i;
+ } Value;
+ Value.i = 0;
+ // Emit the source select (2 bytes). For GPRs, this is the register index.
+ // For other potential instruction operands, (e.g. constant registers) the
+ // value of the source select is defined in the r600isa docs.
+ if (MO.isReg()) {
+ unsigned reg = MO.getReg();
+ EmitTwoBytes(getHWReg(reg), OS);
+ if (reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ Value.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ Value.i = ImmOp.getImm();
+ }
+ }
+ } else {
+ // XXX: Handle other operand types.
+ EmitTwoBytes(0, OS);
+ }
+
+ // Emit the source channel (1 byte)
+ if (MO.isReg()) {
+ EmitByte(getHWRegChan(MO.getReg()), OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit isNegated (1 byte)
+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // Emit isAbsolute (1 byte)
+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit relative addressing mode (1 byte)
+ EmitByte(0, OS);
+
+ // Emit kc_bank, This will be adjusted later by r600_asm
+ EmitByte(0, OS);
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(Value.i, OS);
+
+}
+
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+ unsigned SelOpIdx, raw_ostream &OS) const {
+ const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+ const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
+ union {
+ float f;
+ uint32_t i;
+ } InlineConstant;
+ InlineConstant.i = 0;
+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+ // and select is 0 (GPR index is encoded in the instr encoding. For constants
+ // type is 1 and select is the original const select passed from the driver.
+ unsigned Reg = RegMO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ EmitByte(1, OS);
+ uint32_t Sel = SelMO.getImm();
+ Emit(Sel, OS);
+ } else {
+ EmitByte(0, OS);
+ Emit((uint32_t)0, OS);
+ }
+
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ InlineConstant.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ InlineConstant.i = ImmOp.getImm();
+ }
+ }
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(InlineConstant.i, OS);
+}
+
+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
+
+ // Emit instruction type
+ EmitByte(INSTR_FC, OS);
+
+ // Emit SRC
+ unsigned NumOperands = MI.getNumOperands();
+ if (NumOperands > 0) {
+ assert(NumOperands == 1);
+ EmitSrc(MI, 0, OS);
+ } else {
+ EmitNullBytes(SRC_BYTE_COUNT, OS);
+ }
+
+ // Emit FC Instruction
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDGPU::PREDICATED_BREAK:
+ instr = FC_BREAK_PREDICATE;
+ break;
+ case AMDGPU::CONTINUE:
+ instr = FC_CONTINUE;
+ break;
+ case AMDGPU::IF_PREDICATE_SET:
+ instr = FC_IF_PREDICATE;
+ break;
+ case AMDGPU::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDGPU::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDGPU::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDGPU::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ EmitByte(instr, OS);
+}
+
+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
+ raw_ostream &OS) const {
+
+ for (unsigned int i = 0; i < ByteCount; i++) {
+ EmitByte(0, OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+ OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
+ raw_ostream &OS) const {
+ OS.write((uint8_t) (Bytes & 0xff));
+ OS.write((uint8_t) ((Bytes >> 8) & 0xff));
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 4; i++) {
+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
+ }
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 8; i++) {
+ EmitByte((Value >> (8 * i)) & 0xff, OS);
+ }
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+ return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+ return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ if (MO.isReg()) {
+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+ return MRI.getEncodingValue(MO.getReg());
+ } else {
+ return getHWReg(MO.getReg());
+ }
+ } else if (MO.isImm()) {
+ return MO.getImm();
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::PREDICATED_BREAK:
+ case AMDGPU::CONTINUE:
+ case AMDGPU::IF_PREDICATE_SET:
+ case AMDGPU::ELSE:
+ case AMDGPU::ENDIF:
+ case AMDGPU::ENDLOOP:
+ case AMDGPU::WHILELOOP:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
+ unsigned Flag) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
+ if (FlagIndex == 0) {
+ return false;
+ }
+ assert(MI.getOperand(FlagIndex).isImm());
+ return !!((MI.getOperand(FlagIndex).getImm() >>
+ (NUM_MO_FLAGS * Operand)) & Flag);
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 000000000000..5af83209a0d5
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,201 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The SI code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Helper type used in encoding
+typedef union {
+ int32_t I;
+ float F;
+} IntFloatUnion;
+
+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
+ SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+
+ /// \brief Can this operand also contain immediate values?
+ bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
+
+ /// \brief Encode an fp or int literal
+ uint32_t getLitEncoding(const MCOperand &MO) const;
+
+public:
+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri) { }
+
+ ~SIMCCodeEmitter() { }
+
+ /// \breif Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
+ unsigned OpNo) const {
+
+ unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
+ return (AMDGPU::SSrc_32RegClassID == RegClass) ||
+ (AMDGPU::SSrc_64RegClassID == RegClass) ||
+ (AMDGPU::VSrc_32RegClassID == RegClass) ||
+ (AMDGPU::VSrc_64RegClassID == RegClass);
+}
+
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
+
+ IntFloatUnion Imm;
+ if (MO.isImm())
+ Imm.I = MO.getImm();
+ else if (MO.isFPImm())
+ Imm.F = MO.getFPImm();
+ else
+ return ~0;
+
+ if (Imm.I >= 0 && Imm.I <= 64)
+ return 128 + Imm.I;
+
+ if (Imm.I >= -16 && Imm.I <= -1)
+ return 192 + abs(Imm.I);
+
+ if (Imm.F == 0.5f)
+ return 240;
+
+ if (Imm.F == -0.5f)
+ return 241;
+
+ if (Imm.F == 1.0f)
+ return 242;
+
+ if (Imm.F == -1.0f)
+ return 243;
+
+ if (Imm.F == 2.0f)
+ return 244;
+
+ if (Imm.F == -2.0f)
+ return 245;
+
+ if (Imm.F == 4.0f)
+ return 246;
+
+ if (Imm.F == -4.0f)
+ return 247;
+
+ return 255;
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ unsigned bytes = Desc.getSize();
+
+ for (unsigned i = 0; i < bytes; i++) {
+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+ }
+
+ if (bytes > 4)
+ return;
+
+ // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+ for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+
+ // Check if this operand should be encoded as [SV]Src
+ if (!isSrcOperand(Desc, i))
+ continue;
+
+ // Is this operand a literal immediate?
+ const MCOperand &Op = MI.getOperand(i);
+ if (getLitEncoding(Op) != 255)
+ continue;
+
+ // Yes! Encode it
+ IntFloatUnion Imm;
+ if (Op.isImm())
+ Imm.I = Op.getImm();
+ else
+ Imm.F = Op.getFPImm();
+
+ for (unsigned j = 0; j < 4; j++) {
+ OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+ }
+
+ // Only one literal value allowed
+ break;
+ }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg())
+ return MRI.getEncodingValue(MO.getReg());
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ // Figure out the operand number, needed for isSrcOperand check
+ unsigned OpNo = 0;
+ for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
+ if (&MO == &MI.getOperand(OpNo))
+ break;
+ }
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (isSrcOperand(Desc, OpNo)) {
+ uint32_t Enc = getLitEncoding(MO);
+ if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+ return Enc;
+
+ } else if (MO.isImm())
+ return MO.getImm();
+
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ return 0;
+}
+
diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile
new file mode 100644
index 000000000000..1b3ebbe8c8f3
--- /dev/null
+++ b/lib/Target/R600/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMR600CodeGen
+TARGET = AMDGPU
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
+ AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
+ AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
+ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
+ AMDGPUGenAsmWriter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
new file mode 100644
index 000000000000..868810c613b3
--- /dev/null
+++ b/lib/Target/R600/Processors.td
@@ -0,0 +1,30 @@
+//===-- Processors.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDIL processors supported.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"rv710", R600_EG_Itin, []>;
+def : Proc<"rv730", R600_EG_Itin, []>;
+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
+
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 000000000000..3a6c7eac730f
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,268 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600cf"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+ unsigned MaxFetchInst;
+
+ bool isFetch(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned CfAddress) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isFetch(I))
+ break;
+ AluInstCount ++;
+ if (AluInstCount > MaxFetchInst)
+ break;
+ }
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ TII->get(AMDGPU::CF_TC))
+ .addImm(CfAddress) // ADDR
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+ void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+ MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+ }
+ void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+ const {
+ for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+ It != E; ++It) {
+ MachineInstr *MI = *It;
+ CounterPropagateAddr(MI, Addr);
+ }
+ }
+
+public:
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ MaxFetchInst = 8;
+ else
+ MaxFetchInst = 16;
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ unsigned MaxStack = 0;
+ unsigned CurrentStack = 0;
+ for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+ ++MB) {
+ MachineBasicBlock &MBB = *MB;
+ unsigned CfCount = 0;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<MachineInstr * > IfThenElseStack;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ if (MFI->ShaderType == 1) {
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::CF_CALL_FS));
+ CfCount++;
+ }
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (isFetch(I)) {
+ DEBUG(dbgs() << CfCount << ":"; I->dump(););
+ I = MakeFetchClause(MBB, I, 0);
+ CfCount++;
+ continue;
+ }
+
+ MachineBasicBlock::iterator MI = I;
+ I++;
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ case AMDGPU::CF_ALU:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportBuf:
+ case AMDGPU::R600_ExportSwz:
+ DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ CfCount++;
+ break;
+ case AMDGPU::WHILELOOP: {
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::WHILE_LOOP))
+ .addImm(2);
+ std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::set<MachineInstr *>());
+ Pair.second.insert(MIb);
+ LoopStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDLOOP: {
+ CurrentStack--;
+ std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ LoopStack.back();
+ LoopStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ .addImm(Pair.first + 1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::IF_PREDICATE_SET: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_JUMP))
+ .addImm(0)
+ .addImm(0);
+ IfThenElseStack.push_back(MIb);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ELSE: {
+ MachineInstr * JumpInst = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(JumpInst, CfCount);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_ELSE))
+ .addImm(0)
+ .addImm(1);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ IfThenElseStack.push_back(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDIF: {
+ CurrentStack--;
+ MachineInstr *IfOrElseInst = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(IfOrElseInst, CfCount + 1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::PREDICATED_BREAK: {
+ CurrentStack--;
+ CfCount += 3;
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ .addImm(CfCount)
+ .addImm(1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::LOOP_BREAK))
+ .addImm(0);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount)
+ .addImm(1);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::CONTINUE: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_CONTINUE))
+ .addImm(0);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::STACK_SIZE))
+ .addImm(MaxStack);
+ }
+
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Control Flow Finalizer Pass";
+ }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+ return new R600ControlFlowFinalizer(TM);
+}
+
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
new file mode 100644
index 000000000000..16cfcf59eb3d
--- /dev/null
+++ b/lib/Target/R600/R600Defines.h
@@ -0,0 +1,97 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600DEFINES_H_
+#define R600DEFINES_H_
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+#define MO_FLAG_PUSH (1 << 4)
+#define MO_FLAG_NOT_LAST (1 << 5)
+#define MO_FLAG_LAST (1 << 6)
+#define NUM_MO_FLAGS 7
+
+/// \brief Helper for getting the operand index for the instruction flags
+/// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5),
+ VECTOR = (1 << 6),
+ //FlagOperand bits 7, 8
+ NATIVE_OPERANDS = (1 << 9),
+ OP1 = (1 << 10),
+ OP2 = (1 << 11)
+ };
+}
+
+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
+
+/// \brief Defines for extracting register infomation from register encoding
+#define HW_REG_MASK 0x1ff
+#define HW_CHAN_SHIFT 9
+
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
+namespace R600Operands {
+ enum Ops {
+ DST,
+ UPDATE_EXEC_MASK,
+ UPDATE_PREDICATE,
+ WRITE,
+ OMOD,
+ DST_REL,
+ CLAMP,
+ SRC0,
+ SRC0_NEG,
+ SRC0_REL,
+ SRC0_ABS,
+ SRC0_SEL,
+ SRC1,
+ SRC1_NEG,
+ SRC1_REL,
+ SRC1_ABS,
+ SRC1_SEL,
+ SRC2,
+ SRC2_NEG,
+ SRC2_REL,
+ SRC2_SEL,
+ LAST,
+ PRED_SEL,
+ IMM,
+ COUNT
+ };
+
+ const static int ALUOpTable[3][R600Operands::COUNT] = {
+// W C S S S S S S S S S S S
+// R O D L S R R R R S R R R R S R R R L P
+// D U I M R A R C C C C R C C C C R C C C A R I
+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+ };
+
+}
+
+#endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000000000000..3fdc678b9ef1
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,255 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ unsigned OccupiedDwords(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return 4;
+ case AMDGPU::KILL:
+ return 0;
+ default:
+ break;
+ }
+
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return 4;
+
+ unsigned NumLiteral = 0;
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++NumLiteral;
+ }
+ return 1 + NumLiteral;
+ }
+
+ bool isALU(const MachineInstr *MI) const {
+ if (TII->isALUInstr(MI->getOpcode()))
+ return true;
+ if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // Register Idx, then Const value
+ std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
+ const {
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+ std::vector<std::pair<unsigned, unsigned> > Result;
+
+ if (!TII->isALUInstr(MI->getOpcode()))
+ return Result;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
+ }
+ }
+ return Result;
+ }
+
+ std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+ // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+ // (See also R600ISelLowering.cpp)
+ // ConstIndex value is in [0, 4095];
+ return std::pair<unsigned, unsigned>(
+ ((Sel >> 2) - 512) >> 12, // KC_BANK
+ // Line Number of ConstIndex
+ // A line contains 16 constant registers however KCX bank can lock
+ // two line at the same time ; thus we want to get an even line number.
+ // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+ // an even number.
+ ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+ }
+
+ bool SubstituteKCacheBank(MachineInstr *MI,
+ std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+ std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+ std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
+ assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned Sel = Consts[i].second;
+ unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+ unsigned KCacheIndex = Index * 4 + Chan;
+ const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+ if (CachedConsts.empty()) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[0] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts.size() == 1) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[1] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ return false;
+ }
+
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ switch(UsedKCache[i].first) {
+ case 0:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
+ break;
+ case 1:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
+ break;
+ default:
+ llvm_unreachable("Wrong Cache Line");
+ }
+ }
+ return true;
+ }
+
+ MachineBasicBlock::iterator
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+ bool PushBeforeModifier = false;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isALU(I))
+ break;
+ if (AluInstCount > TII->getMaxAlusPerClause())
+ break;
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+ PushBeforeModifier = true;
+ AluInstCount ++;
+ continue;
+ }
+ if (I->getOpcode() == AMDGPU::KILLGT) {
+ I++;
+ break;
+ }
+ if (TII->isALUInstr(I->getOpcode()) &&
+ !SubstituteKCacheBank(I, KCacheBanks))
+ break;
+ AluInstCount += OccupiedDwords(I);
+ }
+ unsigned Opcode = PushBeforeModifier ?
+ AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+ .addImm(0) // ADDR
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+ .addImm(KCacheBanks.empty()?0:2) // KM0
+ .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+
+public:
+ R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ if (I->getOpcode() == AMDGPU::CF_ALU)
+ continue; // BB was already parsed
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+ if (isALU(I))
+ I = MakeALUClause(MBB, I);
+ else
+ ++I;
+ }
+ }
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Emit Clause Markers Pass";
+ }
+};
+
+char R600EmitClauseMarkersPass::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {
+ return new R600EmitClauseMarkersPass(TM);
+}
+
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 000000000000..f8c900f72776
--- /dev/null
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,297 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
+/// group to work correctly. This pass expands these individual instructions
+/// into several instructions that will completely fill the instruction group.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ bool ExpandInputPerspective(MachineInstr& MI);
+ bool ExpandInputConstant(MachineInstr& MI);
+
+public:
+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "R600 Expand special instructions pass";
+ }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+ return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+
+ switch (MI.getOpcode()) {
+ default: break;
+ // Expand PRED_X to one of the PRED_SET instructions.
+ case AMDGPU::PRED_X: {
+ uint64_t Flags = MI.getOperand(3).getImm();
+ // The native opcode used by PRED_X is stored as an immediate in the
+ // third operand.
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ MI.getOperand(2).getImm(), // opcode
+ MI.getOperand(0).getReg(), // dst
+ MI.getOperand(1).getReg(), // src0
+ AMDGPU::ZERO); // src1
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ if (Flags & MO_FLAG_PUSH) {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+ } else {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
+ }
+ MI.eraseFromParent();
+ continue;
+ }
+ case AMDGPU::BREAK: {
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ AMDGPU::PRED_SETE_INT,
+ AMDGPU::PREDICATE_BIT,
+ AMDGPU::ZERO,
+ AMDGPU::ZERO);
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::PREDICATED_BREAK))
+ .addReg(AMDGPU::PREDICATE_BIT);
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_PAIR_XY: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = MI.getOperand(Chan).getReg();
+ else
+ DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan >= 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_PAIR_ZW: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
+ else
+ DstReg = MI.getOperand(Chan-2).getReg();
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan < 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_VEC_LOAD: {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(1).getImm());
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
+ TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+ }
+
+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
+ bool IsVector = TII->isVector(MI);
+ bool IsCube = TII->isCubeOp(MI.getOpcode());
+ if (!IsReduction && !IsVector && !IsCube) {
+ continue;
+ }
+
+ // Expand the instruction
+ //
+ // Reduction instructions:
+ // T0_X = DP4 T1_XYZW, T2_XYZW
+ // becomes:
+ // TO_X = DP4 T1_X, T2_X
+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
+ // TO_W (write masked) = DP4 T1_W, T2_W
+ //
+ // Vector instructions:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // becomes:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
+ //
+ // Cube instructions:
+ // T0_XYZW = CUBE T1_XYZW
+ // becomes:
+ // TO_X = CUBE T1_Z, T1_Y
+ // T0_Y = CUBE T1_Z, T1_X
+ // T0_Z = CUBE T1_X, T1_Z
+ // T0_W = CUBE T1_Y, T1_Z
+ for (unsigned Chan = 0; Chan < 4; Chan++) {
+ unsigned DstReg = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::DST)).getReg();
+ unsigned Src0 = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
+ unsigned Src1 = 0;
+
+ // Determine the correct source registers
+ if (!IsCube) {
+ int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
+ if (Src1Idx != -1) {
+ Src1 = MI.getOperand(Src1Idx).getReg();
+ }
+ }
+ if (IsReduction) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ } else if (IsCube) {
+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+ }
+
+ // Determine the correct destination registers;
+ bool Mask = false;
+ bool NotLast = true;
+ if (IsCube) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+ } else {
+ // Mask the write if the original instruction does not write to
+ // the current Channel.
+ Mask = (Chan != TRI.getHWRegChan(DstReg));
+ unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ }
+
+ // Set the IsLast bit
+ NotLast = (Chan != 3 );
+
+ // Add the new instruction
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::CUBE_r600_pseudo:
+ Opcode = AMDGPU::CUBE_r600_real;
+ break;
+ case AMDGPU::CUBE_eg_pseudo:
+ Opcode = AMDGPU::CUBE_eg_real;
+ break;
+ case AMDGPU::DOT4_r600_pseudo:
+ Opcode = AMDGPU::DOT4_r600_real;
+ break;
+ case AMDGPU::DOT4_eg_pseudo:
+ Opcode = AMDGPU::DOT4_eg_real;
+ break;
+ default:
+ break;
+ }
+
+ MachineInstr *NewMI =
+ TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
+
+ if (Chan != 0)
+ NewMI->bundleWithPred();
+ if (Mask) {
+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+ }
+ if (NotLast) {
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
new file mode 100644
index 000000000000..53e6e51dd2b1
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -0,0 +1,1106 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for R600
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+ computeRegisterProperties();
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Expand);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::ADD, MVT::v4i32, Expand);
+ setOperationAction(ISD::AND, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
+ setOperationAction(ISD::UREM, MVT::v4i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+
+ setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+
+ // Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+ setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setSchedulingPreference(Sched::VLIW);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+ MachineFunction * MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock::iterator I = *MI;
+
+ switch (MI->getOpcode()) {
+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::CLAMP_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+ break;
+ }
+
+ case AMDGPU::FABS_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_ABS);
+ break;
+ }
+
+ case AMDGPU::FNEG_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_NEG);
+ break;
+ }
+
+ case AMDGPU::MASK_WRITE: {
+ unsigned maskedRegister = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+ TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+ break;
+ }
+
+ case AMDGPU::MOV_IMM_F32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getFPImm()->getValueAPF()
+ .bitcastToAPInt().getZExtValue());
+ break;
+ case AMDGPU::MOV_IMM_I32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+ break;
+ case AMDGPU::CONST_COPY: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+ MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+ MI->getOperand(1).getImm());
+ break;
+ }
+
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(EOP); // Set End of program bit
+ break;
+ }
+
+ case AMDGPU::TXD: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::TXD_SHADOW: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0));
+ break;
+
+ case AMDGPU::BRANCH_COND_f32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::BRANCH_COND_i32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO_INT)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz: {
+ // Instruction is left unmodified if its not the last one of its type
+ bool isLastInstructionOfItsType = true;
+ unsigned InstExportType = MI->getOperand(1).getImm();
+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ EndBlock = BB->end(); NextExportInst != EndBlock;
+ NextExportInst = llvm::next(NextExportInst)) {
+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+ .getImm();
+ if (CurrentInstExportType == InstExportType) {
+ isLastInstructionOfItsType = false;
+ break;
+ }
+ }
+ }
+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+ if (!EOP && !isLastInstructionOfItsType)
+ return BB;
+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addImm(CfInst)
+ .addImm(EOP);
+ break;
+ }
+ case AMDGPU::RETURN: {
+ // RETURN instructions must have the live-out registers as implicit uses,
+ // otherwise they appear dead.
+ R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ MachineInstrBuilder MIB(*MF, MI);
+ for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
+ MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
+ return BB;
+ }
+ }
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+using namespace llvm::Intrinsic;
+using namespace llvm::AMDGPUIntrinsic;
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
+ case ISD::INTRINSIC_VOID: {
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::AMDGPU_store_output: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ MFI->LiveOuts.push_back(Reg);
+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ }
+ case AMDGPUIntrinsic::R600_store_swizzle: {
+ const SDValue Args[8] = {
+ Chain,
+ Op.getOperand(2), // Export Value
+ Op.getOperand(3), // ArrayBase
+ Op.getOperand(4), // Type
+ DAG.getConstant(0, MVT::i32), // SWZ_X
+ DAG.getConstant(1, MVT::i32), // SWZ_Y
+ DAG.getConstant(2, MVT::i32), // SWZ_Z
+ DAG.getConstant(3, MVT::i32) // SWZ_W
+ };
+ return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
+ Args, 8);
+ }
+
+ // default for switch(IntrinsicID)
+ default: break;
+ }
+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ switch(IntrinsicID) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case AMDGPUIntrinsic::R600_load_input: {
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ }
+
+ case AMDGPUIntrinsic::R600_interp_input: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+ MachineSDNode *interp;
+ if (ijb < 0) {
+ interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
+ MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+ return DAG.getTargetExtractSubreg(
+ TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
+ DL, MVT::f32, SDValue(interp, 0));
+ }
+
+ if (slot % 4 < 2)
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+ else
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+
+ return SDValue(interp, slot % 2);
+ }
+
+ case r600_read_ngroups_x:
+ return LowerImplicitParameter(DAG, VT, DL, 0);
+ case r600_read_ngroups_y:
+ return LowerImplicitParameter(DAG, VT, DL, 1);
+ case r600_read_ngroups_z:
+ return LowerImplicitParameter(DAG, VT, DL, 2);
+ case r600_read_global_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 3);
+ case r600_read_global_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 4);
+ case r600_read_global_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 5);
+ case r600_read_local_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 6);
+ case r600_read_local_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 7);
+ case r600_read_local_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 8);
+
+ case r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_X, VT);
+ case r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Y, VT);
+ case r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Z, VT);
+ case r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_X, VT);
+ case r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Y, VT);
+ case r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Z, VT);
+ }
+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+ break;
+ }
+ } // end switch(Op.getOpcode())
+ return SDValue();
+}
+
+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default: return;
+ case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE:
+ SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ return;
+ }
+}
+
+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(
+ ISD::SETCC,
+ Op.getDebugLoc(),
+ MVT::i1,
+ Op, DAG.getConstantFP(0.0f, MVT::f32),
+ DAG.getCondCode(ISD::SETNE)
+ );
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL,
+ unsigned DwordOffset) const {
+ unsigned ByteOffset = DwordOffset * 4;
+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+
+ // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+ assert(isInt<16>(ByteOffset));
+
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getConstant(ByteOffset, MVT::i32), // PTR
+ MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+ false, false, false, 0);
+}
+
+SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+ Op.getOperand(0),
+ Op.getOperand(0),
+ DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(32, MVT::i32),
+ Op.getOperand(1)));
+}
+
+bool R600TargetLowering::isZero(SDValue Op) const {
+ if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ return Cst->isNullValue();
+ } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+ return CstFP->isZero();
+ } else {
+ return false;
+ }
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ SDValue Temp;
+
+ // LHS and RHS are guaranteed to be the same value type
+ EVT CompareVT = LHS.getValueType();
+
+ // Check if we can lower this to a native operation.
+
+ // Try to lower to a SET* instruction:
+ //
+ // SET* can match the following patterns:
+ //
+ // select_cc f32, f32, -1, 0, cc_any
+ // select_cc f32, f32, 1.0f, 0.0f, cc_any
+ // select_cc i32, i32, -1, 0, cc_any
+ //
+
+ // Move hardware True/False values to the correct operand.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ std::swap(False, True);
+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ }
+
+ if (isHWTrueValue(True) && isHWFalseValue(False) &&
+ (CompareVT == VT || VT == MVT::i32)) {
+ // This can be matched by a SET* instruction.
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
+ // Try to lower to a CND* instruction:
+ //
+ // CND* can match the following patterns:
+ //
+ // select_cc f32, 0.0, f32, f32, cc_any
+ // select_cc f32, 0.0, i32, i32, cc_any
+ // select_cc i32, 0, f32, f32, cc_any
+ // select_cc i32, 0, i32, i32, cc_any
+ //
+ if (isZero(LHS) || isZero(RHS)) {
+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
+ SDValue Zero = (isZero(LHS) ? LHS : RHS);
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ if (CompareVT != VT) {
+ // Bitcast True / False to the correct types. This will end up being
+ // a nop, but it allows us to define only a single pattern in the
+ // .TD files for each CND* instruction rather than having to have
+ // one pattern for integer True/False and one for fp True/False
+ True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+ False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+ }
+ if (isZero(LHS)) {
+ CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
+ }
+
+ switch (CCOpcode) {
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT:
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ Temp = True;
+ True = False;
+ False = Temp;
+ break;
+ default:
+ break;
+ }
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ Cond, Zero,
+ True, False,
+ DAG.getCondCode(CCOpcode));
+ return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
+ }
+
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ // If we make it this for it means we have no native instructions to handle
+ // this SELECT_CC, so we must lower it.
+ SDValue HWTrue, HWFalse;
+
+ if (CompareVT == MVT::f32) {
+ HWTrue = DAG.getConstantFP(1.0f, CompareVT);
+ HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+ } else if (CompareVT == MVT::i32) {
+ HWTrue = DAG.getConstant(-1, CompareVT);
+ HWFalse = DAG.getConstant(0, CompareVT);
+ }
+ else {
+ assert(!"Unhandled value type in LowerSELECT_CC");
+ }
+
+ // Lower this unsupported SELECT_CC into a combination of two supported
+ // SELECT_CC operations.
+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
+
+ return DAG.getNode(ISD::SELECT_CC, DL, VT,
+ Cond, HWFalse,
+ True, False,
+ DAG.getCondCode(ISD::SETNE));
+}
+
+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getCondCode(ISD::SETNE));
+}
+
+/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
+/// convert these pointers to a register index. Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+ unsigned StackWidth,
+ SelectionDAG &DAG) const {
+ unsigned SRLPad;
+ switch(StackWidth) {
+ case 1:
+ SRLPad = 2;
+ break;
+ case 2:
+ SRLPad = 3;
+ break;
+ case 4:
+ SRLPad = 4;
+ break;
+ default: llvm_unreachable("Invalid stack width");
+ }
+
+ return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(SRLPad, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+ unsigned ElemIdx,
+ unsigned &Channel,
+ unsigned &PtrIncr) const {
+ switch (StackWidth) {
+ default:
+ case 1:
+ Channel = 0;
+ if (ElemIdx > 0) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 2:
+ Channel = ElemIdx % 2;
+ if (ElemIdx == 2) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 4:
+ Channel = ElemIdx;
+ PtrIncr = 0;
+ break;
+ }
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
+ }
+
+ EVT ValueVT = Value.getValueType();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (ValueVT.isVector()) {
+ unsigned NumElemVT = ValueVT.getVectorNumElements();
+ EVT ElemVT = ValueVT.getVectorElementType();
+ SDValue Stores[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+ Value, DAG.getConstant(i, MVT::i32));
+
+ Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Elem, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32));
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+ } else {
+ if (ValueVT == MVT::i8) {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+ }
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ return Chain;
+}
+
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+ switch (AddressSpace) {
+ case AMDGPUAS::CONSTANT_BUFFER_0:
+ return 512;
+ case AMDGPUAS::CONSTANT_BUFFER_1:
+ return 512 + 4096;
+ case AMDGPUAS::CONSTANT_BUFFER_2:
+ return 512 + 4096 * 2;
+ case AMDGPUAS::CONSTANT_BUFFER_3:
+ return 512 + 4096 * 3;
+ case AMDGPUAS::CONSTANT_BUFFER_4:
+ return 512 + 4096 * 4;
+ case AMDGPUAS::CONSTANT_BUFFER_5:
+ return 512 + 4096 * 5;
+ case AMDGPUAS::CONSTANT_BUFFER_6:
+ return 512 + 4096 * 6;
+ case AMDGPUAS::CONSTANT_BUFFER_7:
+ return 512 + 4096 * 7;
+ case AMDGPUAS::CONSTANT_BUFFER_8:
+ return 512 + 4096 * 8;
+ case AMDGPUAS::CONSTANT_BUFFER_9:
+ return 512 + 4096 * 9;
+ case AMDGPUAS::CONSTANT_BUFFER_10:
+ return 512 + 4096 * 10;
+ case AMDGPUAS::CONSTANT_BUFFER_11:
+ return 512 + 4096 * 11;
+ case AMDGPUAS::CONSTANT_BUFFER_12:
+ return 512 + 4096 * 12;
+ case AMDGPUAS::CONSTANT_BUFFER_13:
+ return 512 + 4096 * 13;
+ case AMDGPUAS::CONSTANT_BUFFER_14:
+ return 512 + 4096 * 14;
+ case AMDGPUAS::CONSTANT_BUFFER_15:
+ return 512 + 4096 * 15;
+ default:
+ return -1;
+ }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ if (ConstantBlock > -1) {
+ SDValue Result;
+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+ dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+ dyn_cast<ConstantSDNode>(Ptr)) {
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+ } else {
+ // non constant ptr cant be folded, keeps it as a v4f32 load
+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+ DAG.getConstant(LoadNode->getAddressSpace() -
+ AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+ );
+ }
+
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (VT.isVector()) {
+ unsigned NumElemVT = VT.getVectorNumElements();
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Loads[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+ Chain, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32),
+ Op.getOperand(2));
+ }
+ for (unsigned i = NumElemVT; i < 4; ++i) {
+ Loads[i] = DAG.getUNDEF(ElemVT);
+ }
+ EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+ } else {
+ LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32), // Channel
+ Op.getOperand(2));
+ }
+
+ SDValue Ops[2];
+ Ops[0] = LoweredLoad;
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+/// XXX Only kernel functions are supported, so we can assume for now that
+/// every function is a kernel function, but in the future we should use
+/// separate calling conventions for kernel and non-kernel functions.
+SDValue R600TargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ unsigned ParamOffsetBytes = 36;
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
+ EVT VT = Ins[i].VT;
+ Type *ArgType = FuncArg->getType();
+ unsigned ArgSizeInBits = ArgType->isPointerTy() ?
+ 32 : ArgType->getPrimitiveSizeInBits();
+ unsigned ArgBytes = ArgSizeInBits >> 3;
+ EVT ArgVT;
+ if (ArgSizeInBits < VT.getSizeInBits()) {
+ assert(!ArgType->isFloatTy() &&
+ "Extending floating point arguments not supported yet");
+ ArgVT = MVT::getIntegerVT(ArgSizeInBits);
+ } else {
+ ArgVT = VT;
+ }
+ PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+ SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
+ DAG.getConstant(ParamOffsetBytes, MVT::i32),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
+ ArgVT, false, false, ArgBytes);
+ InVals.push_back(Arg);
+ ParamOffsetBytes += ArgBytes;
+ }
+ return Chain;
+}
+
+EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
+ case ISD::FP_ROUND: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+ Arg.getOperand(0));
+ }
+ break;
+ }
+
+ // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+ // (i32 select_cc f32, f32, -1, 0 cc)
+ //
+ // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+ // this to one of the SET*_DX10 instructions.
+ case ISD::FP_TO_SINT: {
+ SDValue FNeg = N->getOperand(0);
+ if (FNeg.getOpcode() != ISD::FNEG) {
+ return SDValue();
+ }
+ SDValue SelectCC = FNeg.getOperand(0);
+ if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+ SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+ SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+ !isHWTrueValue(SelectCC.getOperand(2)) ||
+ !isHWFalseValue(SelectCC.getOperand(3))) {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+ SelectCC.getOperand(0), // LHS
+ SelectCC.getOperand(1), // RHS
+ DAG.getConstant(-1, MVT::i32), // True
+ DAG.getConstant(0, MVT::i32), // Flase
+ SelectCC.getOperand(4)); // CC
+
+ break;
+ }
+ // Extract_vec (Build_vector) generated by custom lowering
+ // also needs to be customly combined
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return Arg->getOperand(Element);
+ }
+ }
+ if (Arg.getOpcode() == ISD::BITCAST &&
+ Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+ Arg->getOperand(0).getOperand(Element));
+ }
+ }
+ }
+
+ case ISD::SELECT_CC: {
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+ // selectcc x, y, a, b, inv(cc)
+ //
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+ // selectcc x, y, a, b, cc
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::SELECT_CC) {
+ return SDValue();
+ }
+
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+ ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ if (LHS.getOperand(2).getNode() != True.getNode() ||
+ LHS.getOperand(3).getNode() != False.getNode() ||
+ RHS.getNode() != False.getNode()) {
+ return SDValue();
+ }
+
+ switch (NCC) {
+ default: return SDValue();
+ case ISD::SETNE: return LHS;
+ case ISD::SETEQ: {
+ ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+ LHSCC = ISD::getSetCCInverse(LHSCC,
+ LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(N->getDebugLoc(),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
+ }
+ }
+ }
+ case AMDGPUISD::EXPORT: {
+ SDValue Arg = N->getOperand(1);
+ if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+ break;
+ SDValue NewBldVec[4] = {
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32)
+ };
+ SDValue NewArgs[8] = {
+ N->getOperand(0), // Chain
+ SDValue(),
+ N->getOperand(2), // ArrayBase
+ N->getOperand(3), // Type
+ N->getOperand(4), // SWZ_X
+ N->getOperand(5), // SWZ_Y
+ N->getOperand(6), // SWZ_Z
+ N->getOperand(7) // SWZ_W
+ };
+ for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
+ if (C->isZero()) {
+ NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+ } else if (C->isExactlyValue(1.0)) {
+ NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ }
+ DebugLoc DL = N->getDebugLoc();
+ NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+ }
+ }
+ return SDValue();
+}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
new file mode 100644
index 000000000000..2c09acb9af30
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -0,0 +1,74 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering {
+public:
+ R600TargetLowering(TargetMachine &TM);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+private:
+ const R600InstrInfo * TII;
+
+ /// Each OpenCL kernel has nine implicit parameters that are stored in the
+ /// first nine dwords of a Vertex Buffer. These implicit parameters are
+ /// lowered to load instructions which retreive the values from the Vertex
+ /// Buffer.
+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL, unsigned DwordOffset) const;
+
+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+ /// \brief Lower ROTL opcode to BITALIGN
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+ SelectionDAG &DAG) const;
+ void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+ unsigned &Channel, unsigned &PtrIncr) const;
+ bool isZero(SDValue Op) const;
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
new file mode 100644
index 000000000000..b232188a2641
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -0,0 +1,841 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ for (unsigned I = 0; I < 4; I++) {
+ unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+ buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ RI.getSubReg(DestReg, SubRegIndex),
+ RI.getSubReg(SrcReg, SubRegIndex))
+ .addReg(DestReg,
+ RegState::Define | RegState::Implicit);
+ }
+ } else {
+
+ // We can't copy vec4 registers
+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
+
+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ DestReg, SrcReg);
+ NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
+ .setIsKill(KillSrc);
+ }
+}
+
+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
+ unsigned DstReg, int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addReg(AMDGPU::ALU_LITERAL_X);
+ MIB.addImm(Imm);
+ MIB.addReg(0); // PREDICATE_BIT
+
+ return MI;
+}
+
+unsigned R600InstrInfo::getIEQOpcode() const {
+ return AMDGPU::SETE_INT;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const {
+
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::MOV:
+ case AMDGPU::MOV_IMM_F32:
+ case AMDGPU::MOV_IMM_I32:
+ return true;
+ }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return false;
+ case AMDGPU::RETURN:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DOT4_r600_pseudo:
+ case AMDGPU::DOT4_eg_pseudo:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CUBE_r600_pseudo:
+ case AMDGPU::CUBE_r600_real:
+ case AMDGPU::CUBE_eg_pseudo:
+ case AMDGPU::CUBE_eg_real:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::OP1) |
+ (TargetFlags & R600_InstFlag::OP2) |
+ (TargetFlags & R600_InstFlag::OP3));
+}
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+static
+bool isJump(unsigned Opcode) {
+ return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
+ if (LastOpc == AMDGPU::JUMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastOpc == AMDGPU::JUMP_COND) {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
+ case AMDGPU::JUMP:
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP_COND: {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ I->eraseFromParent();
+ break;
+ }
+ case AMDGPU::JUMP:
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
+ int idx = MI->findFirstPredOperandIdx();
+ if (idx < 0)
+ return false;
+
+ unsigned Reg = MI->getOperand(idx).getReg();
+ switch (Reg) {
+ default: return false;
+ case AMDGPU::PRED_SEL_ONE:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PREDICATE_BIT:
+ return true;
+ }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const {
+ // XXX: KILL* instructions can be predicated, but they must be the last
+ // instruction in a clause, so this means any instructions after them cannot
+ // be predicated. Until we have proper support for instruction clauses in the
+ // backend, we will mark KILL* instructions as unpredicable.
+
+ if (MI->getOpcode() == AMDGPU::KILLGT) {
+ return false;
+ } else if (isVector(*MI)) {
+ return false;
+ } else {
+ return AMDGPUInstrInfo::isPredicable(MI);
+ }
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ const BranchProbability &Probability)
+ const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ MachineOperand &MO = Cond[1];
+ switch (MO.getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+ break;
+ case OPCODE_IS_NOT_ZERO_INT:
+ MO.setImm(OPCODE_IS_ZERO_INT);
+ break;
+ case OPCODE_IS_ZERO:
+ MO.setImm(OPCODE_IS_NOT_ZERO);
+ break;
+ case OPCODE_IS_NOT_ZERO:
+ MO.setImm(OPCODE_IS_ZERO);
+ break;
+ default:
+ return true;
+ }
+
+ MachineOperand &MO2 = Cond[2];
+ switch (MO2.getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (PredCost)
+ *PredCost = 2;
+ return 2;
+}
+
+int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = 0;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ Offset = std::max(Offset,
+ GET_REG_INDEX(RI.getEncodingValue(LI->first)));
+ }
+
+ return Offset + 1;
+}
+
+int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
+std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+ const MachineFunction &MF) const {
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+ std::vector<unsigned> Regs;
+
+ unsigned StackWidth = TFL->getStackWidth(MF);
+ int End = getIndirectIndexEnd(MF);
+
+ if (End == -1) {
+ return Regs;
+ }
+
+ for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+ unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+ Regs.push_back(SuperReg);
+ for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+ Regs.push_back(Reg);
+ }
+ }
+ return Regs;
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ // XXX: Remove when we support a stack width > 2
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ return &AMDGPU::R600_TReg32RegClass;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
+ return &AMDGPU::TRegMemRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X, OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ AddrReg, ValueReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::DST_REL, 1);
+ return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X,
+ OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ ValueReg,
+ AddrReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+
+ return Mov;
+}
+
+const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
+ return &AMDGPU::IndirectRegRegClass;
+}
+
+unsigned R600InstrInfo::getMaxAlusPerClause() const {
+ return 115;
+}
+
+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg) const {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
+ DstReg); // $dst
+
+ if (Src1Reg) {
+ MIB.addImm(0) // $update_exec_mask
+ .addImm(0); // $update_predicate
+ }
+ MIB.addImm(1) // $write
+ .addImm(0) // $omod
+ .addImm(0) // $dst_rel
+ .addImm(0) // $dst_clamp
+ .addReg(Src0Reg) // $src0
+ .addImm(0) // $src0_neg
+ .addImm(0) // $src0_rel
+ .addImm(0) // $src0_abs
+ .addImm(-1); // $src0_sel
+
+ if (Src1Reg) {
+ MIB.addReg(Src1Reg) // $src1
+ .addImm(0) // $src1_neg
+ .addImm(0) // $src1_rel
+ .addImm(0) // $src1_abs
+ .addImm(-1); // $src1_sel
+ }
+
+ //XXX: The r600g finalizer expects this to be 1, once we've moved the
+ //scheduling to the backend, we can change the default to 0.
+ MIB.addImm(1) // $last
+ .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+ .addImm(0); // $literal
+
+ return MIB;
+}
+
+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const {
+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
+ AMDGPU::ALU_LITERAL_X);
+ setImmOperand(MovImm, R600Operands::IMM, Imm);
+ return MovImm;
+}
+
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+ R600Operands::Ops Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::Ops Op) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+ unsigned OpTableIdx;
+
+ if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
+ switch (Op) {
+ case R600Operands::DST: return 0;
+ case R600Operands::SRC0: return 1;
+ case R600Operands::SRC1: return 2;
+ case R600Operands::SRC2: return 3;
+ default:
+ assert(!"Unknown operand type for instruction");
+ return -1;
+ }
+ }
+
+ if (TargetFlags & R600_InstFlag::OP1) {
+ OpTableIdx = 0;
+ } else if (TargetFlags & R600_InstFlag::OP2) {
+ OpTableIdx = 1;
+ } else {
+ assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
+ "for this instruction");
+ OpTableIdx = 2;
+ }
+
+ return R600Operands::ALUOpTable[OpTableIdx][Op];
+}
+
+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
+ int64_t Imm) const {
+ int Idx = getOperandIdx(*MI, Op);
+ assert(Idx != -1 && "Operand not supported for this instruction.");
+ assert(MI->getOperand(Idx).isImm());
+ MI->getOperand(Idx).setImm(Imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ int FlagIndex = 0;
+ if (Flag != 0) {
+ // If we pass something other than the default value of Flag to this
+ // function, it means we are want to set a flag on an instruction
+ // that uses native encoding.
+ assert(HAS_NATIVE_OPERANDS(TargetFlags));
+ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
+ switch (Flag) {
+ case MO_FLAG_CLAMP:
+ FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
+ break;
+ case MO_FLAG_MASK:
+ FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
+ break;
+ case MO_FLAG_NOT_LAST:
+ case MO_FLAG_LAST:
+ FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
+ break;
+ case MO_FLAG_NEG:
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
+ case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
+ }
+ break;
+
+ case MO_FLAG_ABS:
+ assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
+ "instructions.");
+ (void)IsOP3;
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
+ }
+ break;
+
+ default:
+ FlagIndex = -1;
+ break;
+ }
+ assert(FlagIndex != -1 && "Flag not supported for this instruction");
+ } else {
+ FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
+ assert(FlagIndex != 0 &&
+ "Instruction flags not supported for this instruction");
+ }
+
+ MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+ assert(FlagOp.isImm());
+ return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (Flag == 0) {
+ return;
+ }
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ if (Flag == MO_FLAG_NOT_LAST) {
+ clearFlag(MI, Operand, MO_FLAG_LAST);
+ } else if (Flag == MO_FLAG_MASK) {
+ clearFlag(MI, Operand, Flag);
+ } else {
+ FlagOp.setImm(1);
+ }
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand);
+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+ }
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ FlagOp.setImm(0);
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI);
+ unsigned InstFlags = FlagOp.getImm();
+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+ FlagOp.setImm(InstFlags);
+ }
+}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
new file mode 100644
index 000000000000..dbae90013d22
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -0,0 +1,204 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class DFAPacketizer;
+ class ScheduleDAG;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDGPUInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+
+ int getBranchInstr(const MachineOperand &op) const;
+
+ public:
+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ bool isTrig(const MachineInstr &MI) const;
+ bool isPlaceHolderOpcode(unsigned opcode) const;
+ bool isReductionOp(unsigned opcode) const;
+ bool isCubeOp(unsigned opcode) const;
+
+ /// \returns true if this \p Opcode represents an ALU instruction.
+ bool isALUInstr(unsigned Opcode) const;
+
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
+ /// \breif Vector instructions are instructions that must fill all
+ /// instruction slots within an instruction group.
+ bool isVector(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const;
+ virtual bool isMov(unsigned Opcode) const;
+
+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ bool isPredicated(const MachineInstr *MI) const;
+
+ bool isPredicable(MachineInstr *MI) const;
+
+ bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ const BranchProbability &Probability) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const ;
+
+ bool
+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const { return 1;}
+
+ /// \returns a list of all the registers that may be accesed using indirect
+ /// addressing.
+ std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+
+ unsigned getMaxAlusPerClause() const;
+
+ ///buildDefaultInstruction - This function returns a MachineInstr with
+ /// all the instruction modifiers initialized to their default values.
+ /// You can use this function to avoid manually specifying each instruction
+ /// modifier operand when building a new instruction.
+ ///
+ /// \returns a MachineInstr with all the instruction modifiers initialized
+ /// to their default values.
+ MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg = 0) const;
+
+ MachineInstr *buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const;
+
+ /// \brief Get the index of Op in the MachineInstr.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+
+ /// \brief Get the index of \p Op for the given Opcode.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+
+ /// \brief Helper function for setting instruction flag values.
+ void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
+
+ /// \returns true if this instruction has an operand for storing target flags.
+ bool hasFlagOperand(const MachineInstr &MI) const;
+
+ ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+ ///\brief Determine if the specified \p Flag is set on this \p Operand.
+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+ /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
+ /// \param Flag The flag being set.
+ ///
+ /// \returns the operand containing the flags for this instruction.
+ MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
+ unsigned Flag = 0) const;
+
+ /// \brief Clear the specified flag on the instruction.
+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+} // End llvm namespace
+
+#endif // R600INSTRINFO_H_
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
new file mode 100644
index 000000000000..663b41a66d6f
--- /dev/null
+++ b/lib/Target/R600/R600Instructions.td
@@ -0,0 +1,2267 @@
+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Tablegen instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin>
+ : AMDGPUInst <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ bit Trig = 0;
+ bit Op3 = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
+ bit Op1 = 0;
+ bit Op2 = 0;
+ bit HasNativeOperands = 0;
+
+ bits<11> op_code = inst;
+ //let Inst = inst;
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+
+ // Vector instructions are instructions that must fill all slots in an
+ // instruction group
+ let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
+ let TSFlags{9} = HasNativeOperands;
+ let TSFlags{10} = Op1;
+ let TSFlags{11} = Op2;
+}
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst <outs, ins, asm, pattern> {
+ field bits<64> Inst;
+
+ let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+ let PrintMethod = "printMemOperand";
+}
+
+def MEMrr : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+// Operands for non-registers
+
+class InstFlag<string PM = "printOperand", int Default = 0>
+ : OperandWithDefaultOps <i32, (ops (i32 Default))> {
+ let PrintMethod = PM;
+}
+
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
+def LITERAL : InstFlag<"printLiteral">;
+
+def WRITE : InstFlag <"printWrite", 1>;
+def OMOD : InstFlag <"printOMOD">;
+def REL : InstFlag <"printRel">;
+def CLAMP : InstFlag <"printClamp">;
+def NEG : InstFlag <"printNeg">;
+def ABS : InstFlag <"printAbs">;
+def UEM : InstFlag <"printUpdateExecMask">;
+def UP : InstFlag <"printUpdatePred">;
+
+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
+// Once we start using the packetizer in this backend we should have this
+// default to 0.
+def LAST : InstFlag<"printLast", 1>;
+
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+
+class R600ALU_Word0 {
+ field bits<32> Word0;
+
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<11> src1;
+ bits<1> src1_rel;
+ bits<1> src1_neg;
+ bits<3> index_mode = 0;
+ bits<2> pred_sel;
+ bits<1> last;
+
+ bits<9> src0_sel = src0{8-0};
+ bits<2> src0_chan = src0{10-9};
+ bits<9> src1_sel = src1{8-0};
+ bits<2> src1_chan = src1{10-9};
+
+ let Word0{8-0} = src0_sel;
+ let Word0{9} = src0_rel;
+ let Word0{11-10} = src0_chan;
+ let Word0{12} = src0_neg;
+ let Word0{21-13} = src1_sel;
+ let Word0{22} = src1_rel;
+ let Word0{24-23} = src1_chan;
+ let Word0{25} = src1_neg;
+ let Word0{28-26} = index_mode;
+ let Word0{30-29} = pred_sel;
+ let Word0{31} = last;
+}
+
+class R600ALU_Word1 {
+ field bits<32> Word1;
+
+ bits<11> dst;
+ bits<3> bank_swizzle = 0;
+ bits<1> dst_rel;
+ bits<1> clamp;
+
+ bits<7> dst_sel = dst{6-0};
+ bits<2> dst_chan = dst{10-9};
+
+ let Word1{20-18} = bank_swizzle;
+ let Word1{27-21} = dst_sel;
+ let Word1{28} = dst_rel;
+ let Word1{30-29} = dst_chan;
+ let Word1{31} = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+ bits<1> src0_abs;
+ bits<1> src1_abs;
+ bits<1> update_exec_mask;
+ bits<1> update_pred;
+ bits<1> write;
+ bits<2> omod;
+
+ let Word1{0} = src0_abs;
+ let Word1{1} = src1_abs;
+ let Word1{2} = update_exec_mask;
+ let Word1{3} = update_pred;
+ let Word1{4} = write;
+ let Word1{6-5} = omod;
+ let Word1{17-7} = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+ bits<11> src2;
+ bits<1> src2_rel;
+ bits<1> src2_neg;
+
+ bits<9> src2_sel = src2{8-0};
+ bits<2> src2_chan = src2{10-9};
+
+ let Word1{8-0} = src2_sel;
+ let Word1{9} = src2_rel;
+ let Word1{11-10} = src2_chan;
+ let Word1{12} = src2_neg;
+ let Word1{17-13} = alu_inst;
+}
+
+class VTX_WORD0 {
+ field bits<32> Word0;
+ bits<7> SRC_GPR;
+ bits<5> VC_INST;
+ bits<2> FETCH_TYPE;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> BUFFER_ID;
+ bits<1> SRC_REL;
+ bits<2> SRC_SEL_X;
+ bits<6> MEGA_FETCH_COUNT;
+
+ let Word0{4-0} = VC_INST;
+ let Word0{6-5} = FETCH_TYPE;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = BUFFER_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{25-24} = SRC_SEL_X;
+ let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+ field bits<32> Word1;
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<1> USE_CONST_FIELDS;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL;
+ bits<1> FORMAT_COMP_ALL;
+ bits<1> SRF_MODE_ALL;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{8} = 0; // Reserved
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{21} = USE_CONST_FIELDS;
+ let Word1{27-22} = DATA_FORMAT;
+ let Word1{29-28} = NUM_FORMAT_ALL;
+ let Word1{30} = FORMAT_COMP_ALL;
+ let Word1{31} = SRF_MODE_ALL;
+}
+
+class TEX_WORD0 {
+ field bits<32> Word0;
+
+ bits<5> TEX_INST;
+ bits<2> INST_MOD;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> RESOURCE_ID;
+ bits<7> SRC_GPR;
+ bits<1> SRC_REL;
+ bits<1> ALT_CONST;
+ bits<2> RESOURCE_INDEX_MODE;
+ bits<2> SAMPLER_INDEX_MODE;
+
+ let Word0{4-0} = TEX_INST;
+ let Word0{6-5} = INST_MOD;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = RESOURCE_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{24} = ALT_CONST;
+ let Word0{26-25} = RESOURCE_INDEX_MODE;
+ let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+ field bits<32> Word1;
+
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<7> LOD_BIAS;
+ bits<1> COORD_TYPE_X;
+ bits<1> COORD_TYPE_Y;
+ bits<1> COORD_TYPE_Z;
+ bits<1> COORD_TYPE_W;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{27-21} = LOD_BIAS;
+ let Word1{28} = COORD_TYPE_X;
+ let Word1{29} = COORD_TYPE_Y;
+ let Word1{30} = COORD_TYPE_Z;
+ let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+ field bits<32> Word2;
+
+ bits<5> OFFSET_X;
+ bits<5> OFFSET_Y;
+ bits<5> OFFSET_Z;
+ bits<5> SAMPLER_ID;
+ bits<3> SRC_SEL_X;
+ bits<3> SRC_SEL_Y;
+ bits<3> SRC_SEL_Z;
+ bits<3> SRC_SEL_W;
+
+ let Word2{4-0} = OFFSET_X;
+ let Word2{9-5} = OFFSET_Y;
+ let Word2{14-10} = OFFSET_Z;
+ let Word2{19-15} = SAMPLER_ID;
+ let Word2{22-20} = SRC_SEL_X;
+ let Word2{25-23} = SRC_SEL_Y;
+ let Word2{28-26} = SRC_SEL_Z;
+ let Word2{31-29} = SRC_SEL_W;
+}
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets. We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+ bits<1> fog_merge;
+ bits<10> alu_inst;
+
+ let Inst{37} = fog_merge;
+ let Inst{39-38} = omod;
+ let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+ bits<11> alu_inst;
+
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+}
+*/
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+ (ops PRED_SEL_OFF)>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+// Class for instructions with only one source register.
+// If you add new ins to this instruction, make sure they are listed before
+// $literal, because the backend currently assumes that the last operand is
+// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
+// and R600InstrInfo::getOperandIdx().
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName,
+ "$clamp $dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let src1 = 0;
+ let src1_rel = 0;
+ let src1_neg = 0;
+ let src1_abs = 0;
+ let update_exec_mask = 0;
+ let update_pred = 0;
+ let HasNativeOperands = 1;
+ let Op1 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itin = AnyALU> :
+ R600_1OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+>;
+
+// If you add our change the operands for R600_2OP instructions, you must
+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
+ OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName,
+ "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let HasNativeOperands = 1;
+ let Op2 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itim = AnyALU> :
+ R600_2OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
+ R600_Reg32:$src1))]
+>;
+
+// If you add our change the operands for R600_3OP instructions, you must
+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and
+// R600InstrInfo::getOperandIdx().
+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(" ", opName, "$clamp $dst$dst_rel, "
+ "$src0_neg$src0$src0_rel, "
+ "$src1_neg$src1$src1_rel, "
+ "$src2_neg$src2$src2_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP3<inst>{
+
+ let HasNativeOperands = 1;
+ let DisableEncoding = "$literal";
+ let Op3 = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin = VecALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ ins,
+ asm,
+ pattern,
+ itin>;
+
+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg128:$DST_GPR),
+ (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+ !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
+ pattern,
+ itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+ let TEX_INST = inst{4-0};
+ let SRC_REL = 0;
+ let DST_REL = 0;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let LOD_BIAS = 0;
+
+ let INST_MOD = 0;
+ let FETCH_WHOLE_QUAD = 0;
+ let ALT_CONST = 0;
+ let SAMPLER_INDEX_MODE = 0;
+
+ let COORD_TYPE_X = 0;
+ let COORD_TYPE_Y = 0;
+ let COORD_TYPE_Z = 0;
+ let COORD_TYPE_W = 0;
+ }
+
+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
+ }]
+>;
+
+def TEX_RECT : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 5;
+ }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+ dag ins, string asm, list<dag> pattern> :
+ InstR600ISA <outs, ins, asm, pattern> {
+ bits<7> RW_GPR;
+ bits<7> INDEX_GPR;
+
+ bits<2> RIM;
+ bits<2> TYPE;
+ bits<1> RW_REL;
+ bits<2> ELEM_SIZE;
+
+ bits<12> ARRAY_SIZE;
+ bits<4> COMP_MASK;
+ bits<4> BURST_COUNT;
+ bits<1> VPM;
+ bits<1> eop;
+ bits<1> MARK;
+ bits<1> BARRIER;
+
+ // CF_ALLOC_EXPORT_WORD0_RAT
+ let Inst{3-0} = rat_id;
+ let Inst{9-4} = rat_inst;
+ let Inst{10} = 0; // Reserved
+ let Inst{12-11} = RIM;
+ let Inst{14-13} = TYPE;
+ let Inst{21-15} = RW_GPR;
+ let Inst{22} = RW_REL;
+ let Inst{29-23} = INDEX_GPR;
+ let Inst{31-30} = ELEM_SIZE;
+
+ // CF_ALLOC_EXPORT_WORD1_BUF
+ let Inst{43-32} = ARRAY_SIZE;
+ let Inst{47-44} = COMP_MASK;
+ let Inst{51-48} = BURST_COUNT;
+ let Inst{52} = VPM;
+ let Inst{53} = eop;
+ let Inst{61-54} = cf_inst;
+ let Inst{62} = MARK;
+ let Inst{63} = BARRIER;
+}
+
+class LoadParamFrag <PatFrag load_type> : PatFrag <
+ (ops node:$ptr), (load_type node:$ptr),
+ [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
+>;
+
+def load_param : LoadParamFrag<load>;
+def load_param_zexti8 : LoadParamFrag<zextloadi8>;
+def load_param_zexti16 : LoadParamFrag<zextloadi16>;
+
+def isR600 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
+def isR700 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->getDeviceFlag()"
+ ">= OCL_DEVICE_RV710">;
+def isEG : Predicate<
+ "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
+ "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
+
+def isCayman : Predicate<"Subtarget.device()"
+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
+ "|| Subtarget.device()->getGeneration() =="
+ "AMDGPUDeviceInfo::HD6XXX">;
+
+def isR600toCayman : Predicate<
+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
+
+//===----------------------------------------------------------------------===//
+// R600 SDNodes
+//===----------------------------------------------------------------------===//
+
+def INTERP_PAIR_XY : AMDGPUShaderInst <
+ (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+ []>;
+
+def INTERP_PAIR_ZW : AMDGPUShaderInst <
+ (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+ []>;
+
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+ SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPVariadic]
+>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP_VEC_LOAD : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0),
+ "INTERP_LOAD $src0 : $dst",
+ []>;
+
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+class ExportWord0 {
+ field bits<32> Word0;
+
+ bits<13> arraybase;
+ bits<2> type;
+ bits<7> gpr;
+ bits<2> elem_size;
+
+ let Word0{12-0} = arraybase;
+ let Word0{14-13} = type;
+ let Word0{21-15} = gpr;
+ let Word0{22} = 0; // RW_REL
+ let Word0{29-23} = 0; // INDEX_GPR
+ let Word0{31-30} = elem_size;
+}
+
+class ExportSwzWord1 {
+ field bits<32> Word1;
+
+ bits<3> sw_x;
+ bits<3> sw_y;
+ bits<3> sw_z;
+ bits<3> sw_w;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{2-0} = sw_x;
+ let Word1{5-3} = sw_y;
+ let Word1{8-6} = sw_z;
+ let Word1{11-9} = sw_w;
+}
+
+class ExportBufWord1 {
+ field bits<32> Word1;
+
+ bits<12> arraySize;
+ bits<4> compMask;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{11-0} = arraySize;
+ let Word1{15-12} = compMask;
+}
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ 0, 61, 0, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ 0, 61, 7, 0, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_dummy (i32 imm:$type)),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_dummy 1),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+ imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
+ >;
+
+}
+
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+ bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ 4095, imm:$mask, buf3inst, 0)>;
+}
+
+let usesCustomInserter = 1 in {
+
+class ExportSwzInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
+ i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportSwzWord1 {
+ let elem_size = 3;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+} // End usesCustomInserter = 1
+
+class ExportBufInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportBufWord1 {
+ let elem_size = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+ field bits<32> Word0;
+
+ bits<22> ADDR;
+ bits<4> KCACHE_BANK0;
+ bits<4> KCACHE_BANK1;
+ bits<2> KCACHE_MODE0;
+
+ let Word0{21-0} = ADDR;
+ let Word0{25-22} = KCACHE_BANK0;
+ let Word0{29-26} = KCACHE_BANK1;
+ let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+ field bits<32> Word1;
+
+ bits<2> KCACHE_MODE1;
+ bits<8> KCACHE_ADDR0;
+ bits<8> KCACHE_ADDR1;
+ bits<7> COUNT;
+ bits<1> ALT_CONST;
+ bits<4> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{1-0} = KCACHE_MODE1;
+ let Word1{9-2} = KCACHE_ADDR0;
+ let Word1{17-10} = KCACHE_ADDR1;
+ let Word1{24-18} = COUNT;
+ let Word1{25} = ALT_CONST;
+ let Word1{29-26} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let ALT_CONST = 0;
+ let WHOLE_QUAD_MODE = 0;
+ let BARRIER = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class CF_WORD0 {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+"TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+"VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+}
+
+def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+def SETE : R600_2OP <
+ 0x08, "SETE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_EQ))]
+>;
+
+def SGT : R600_2OP <
+ 0x09, "SETGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GT))]
+>;
+
+def SGE : R600_2OP <
+ 0xA, "SETGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GE))]
+>;
+
+def SNE : R600_2OP <
+ 0xB, "SETNE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_NE))]
+>;
+
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
+
+def MOV : R600_1OP <0x19, "MOV", []>;
+
+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+
+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+ (outs R600_Reg32:$dst),
+ (ins immType:$imm),
+ "",
+ []
+>;
+
+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+ (imm:$val),
+ (MOV_IMM_I32 imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+ (fpimm:$val),
+ (MOV_IMM_F32 fpimm:$val)
+>;
+
+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
+
+let hasSideEffects = 1 in {
+
+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
+
+} // end hasSideEffects
+
+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+
+def SETE_INT : R600_2OP <
+ 0x3A, "SETE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+ 0x3B, "SETGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+ 0x3C, "SETGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+ 0x3D, "SETNE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+ 0x3E, "SETGT_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+ 0x3F, "SETGE_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+>;
+
+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
+
+def CNDE_INT : R600_3OP <
+ 0x1C, "CNDE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+ 0x1E, "CNDGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+ 0x1D, "CNDGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+def TEX_LD : R600_TEX <
+ 0x03, "TEX_LD",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+ imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+ imm:$SAMPLER_ID, imm:$textureTarget))]
+> {
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+ "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+ i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+ i32imm:$textureTarget);
+}
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+ 0x04, "TEX_GET_TEXTURE_RESINFO",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+ 0x07, "TEX_GET_GRADIENTS_H",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+ 0x08, "TEX_GET_GRADIENTS_V",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SET_GRADIENTS_H : R600_TEX <
+ 0x0B, "TEX_SET_GRADIENTS_H",
+ []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+ 0x0C, "TEX_SET_GRADIENTS_V",
+ []
+>;
+
+def TEX_SAMPLE : R600_TEX <
+ 0x10, "TEX_SAMPLE",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+ 0x18, "TEX_SAMPLE_C",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+ 0x11, "TEX_SAMPLE_L",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+ 0x19, "TEX_SAMPLE_C_L",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+ 0x12, "TEX_SAMPLE_LB",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+ 0x1A, "TEX_SAMPLE_C_LB",
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+ 0x14, "TEX_SAMPLE_G",
+ []
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+ 0x1C, "TEX_SAMPLE_C_G",
+ []
+>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
+ inst, "MUL_LIT",
+ []
+>;
+
+class MULADD_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
+ [(set (f32 R600_Reg32:$dst),
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+>;
+
+class CNDE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+class CNDGT_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+class CNDGE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+multiclass DOT4_Common <bits<11> inst> {
+
+ def _pseudo : R600_REDUCTION <inst,
+ (ins R600_Reg128:$src0, R600_Reg128:$src1),
+ "DOT4 $dst $src0, $src1",
+ [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+ >;
+
+ def _real : R600_2OP <inst, "DOT4", []>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+multiclass CUBE_Common <bits<11> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ > {
+ let isPseudo = 1;
+ }
+
+ def _real : R600_2OP <inst, "CUBE", []>;
+}
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "EXP_IEEE", fexp2
+>;
+
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_INT", fp_to_sint
+>;
+
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "INT_TO_FLT", sint_to_fp
+>;
+
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_UINT", fp_to_uint
+>;
+
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "UINT_TO_FLT", uint_to_fp
+>;
+
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "LOG_CLAMPED", []
+>;
+
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "LOG_IEEE", flog2
+>;
+
+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI_INT", mulhs
+>;
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI", mulhu
+>;
+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULLO_INT", mul
+>;
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_CLAMPED", []
+>;
+
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
+>;
+
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIP_UINT", AMDGPUurecip
+>;
+
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+>;
+
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIPSQRT_IEEE", []
+>;
+
+class SIN_Common <bits<11> inst> : R600_1OP <
+ inst, "SIN", []>{
+ let Trig = 1;
+}
+
+class COS_Common <bits<11> inst> : R600_1OP <
+ inst, "COS", []> {
+ let Trig = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+multiclass DIV_Common <InstR600 recip_ieee> {
+def : Pat<
+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+
+def : Pat<
+ (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+}
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+ def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
+ def CNDE_r600 : CNDE_Common<0x18>;
+ def CNDGT_r600 : CNDGT_Common<0x19>;
+ def CNDGE_r600 : CNDGE_Common<0x1A>;
+ defm DOT4_r600 : DOT4_Common<0x50>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+ def SIN_r600 : SIN_Common<0x6E>;
+ def COS_r600 : COS_Common<0x6F>;
+ def ASHR_r600 : ASHR_Common<0x70>;
+ def LSHR_r600 : LSHR_Common<0x71>;
+ def LSHL_r600 : LSHL_Common<0x72>;
+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+ defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+ def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+
+ def R600_ExportSwz : ExportSwzInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<R600_ExportSwz, 39>;
+
+ def R600_ExportBuf : ExportBufInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+}
+
+// Helper pattern for normalizing inputs to triginomic instructions for R700+
+// cards.
+class COS_PAT <InstR600 trig> : Pat<
+ (fcos R600_Reg32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+class SIN_PAT <InstR600 trig> : Pat<
+ (fsin R600_Reg32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+//===----------------------------------------------------------------------===//
+// R700 Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR700] in {
+ def SIN_r700 : SIN_Common<0x6E>;
+ def COS_r700 : COS_Common<0x6F>;
+
+ // R700 normalizes inputs to SIN/COS the same as EG
+ def : SIN_PAT <SIN_r700>;
+ def : COS_PAT <COS_r700>;
+}
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_eg>;
+def : COS_PAT <COS_eg>;
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+ R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+ def ASHR_eg : ASHR_Common<0x15>;
+ def LSHR_eg : LSHR_Common<0x16>;
+ def LSHL_eg : LSHL_Common<0x17>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ defm DOT4_eg : DOT4_Common<0xBE>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
+
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+ let Pattern = [];
+ }
+
+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+ let Pattern = [];
+ }
+
+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+ // TRUNC is used for the FLT_TO_INT instructions to work around a
+ // perceived problem where the rounding modes are applied differently
+ // depending on the instruction and the slot they are in.
+ // See:
+ // https://bugs.freedesktop.org/show_bug.cgi?id=50232
+ // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
+ //
+ // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
+ // which do not need to be truncated since the fp values are 0.0f or 1.0f.
+ // We should look into handling these cases separately.
+ def : Pat<(fp_to_sint R600_Reg32:$src0),
+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def : Pat<(fp_to_uint R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def EG_ExportSwz : ExportSwzInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<EG_ExportSwz, 83>;
+
+ def EG_ExportBuf : ExportBufInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
+ !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
+ let RIM = 0;
+ // XXX: Have a separate instruction for non-indexed writes.
+ let TYPE = 1;
+ let RW_REL = 0;
+ let ELEM_SIZE = 0;
+
+ let ARRAY_SIZE = 0;
+ let COMP_MASK = comp_mask;
+ let BURST_COUNT = 0;
+ let VPM = 0;
+ let MARK = 0;
+ let BARRIER = 1;
+}
+
+} // End usesCustomInserter = 1
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0x1, "RAT_WRITE_CACHELESS_32_eg",
+ [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0xf, "RAT_WRITE_CACHELESS_128",
+ [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ // Static fields
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+ // however, based on my testing if USE_CONST_FIELDS is set, then all
+ // these fields need to be set to 0.
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 1;
+ let FORMAT_COMP_ALL = 0;
+ let SRF_MODE_ALL = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+ // LLVM can only encode 64-bit instructions, so these fields are manually
+ // encoded in R600CodeEmitter
+ //
+ // bits<16> OFFSET;
+ // bits<2> ENDIAN_SWAP = 0;
+ // bits<1> CONST_BUF_NO_STRIDE = 0;
+ // bits<1> MEGA_FETCH = 0;
+ // bits<1> ALT_CONST = 0;
+ // bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+ // is done in R600CodeEmitter
+ //
+ // Inst{79-64} = OFFSET;
+ // Inst{81-80} = ENDIAN_SWAP;
+ // Inst{82} = CONST_BUF_NO_STRIDE;
+ // Inst{83} = MEGA_FETCH;
+ // Inst{84} = ALT_CONST;
+ // Inst{86-85} = BUFFER_INDEX_MODE;
+ // Inst{95-86} = 0; Reserved
+
+ // VTX_WORD3 (Padding)
+ //
+ // Inst{127-96} = 0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 1;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+ let MEGA_FETCH_COUNT = 2;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+ [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Constant Loads
+// XXX: We are currently storing all constants in the global address space.
+//===----------------------------------------------------------------------===//
+
+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_cm>;
+def : COS_PAT <COS_cm>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+
+// RECIP_UINT emulation for Cayman
+def : Pat <
+ (AMDGPUurecip R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
+ (MOV_IMM_I32 0x4f800000)))
+>;
+
+
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+
+} // End isCayman
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+
+def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
+ "IF_PREDICATE_SET $src", []>;
+
+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
+ "PREDICATED_BREAK $src", []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+def PRED_X : InstR600 <
+ 0, (outs R600_Predicate_Bit:$dst),
+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+ "", [], NullALU> {
+ let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Predicate_Bit:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target),
+ "JUMP $target",
+ [], AnyALU
+ >
+{
+ let isPredicable = 1;
+ let isBarrier = 1;
+}
+
+} // End isTerminator = 1, isBranch = 1
+
+let usesCustomInserter = 1 in {
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+ (outs),
+ (ins R600_Reg32:$src),
+ "MASK_WRITE $src",
+ []
+>;
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
+
+
+def TXD: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+} // End isPseudo = 1
+} // End usesCustomInserter = 1
+
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern =
+ [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+ [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X = 0;
+let DST_SEL_Y = 1;
+let DST_SEL_Z = 2;
+let DST_SEL_W = 3;
+let DATA_FORMAT = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+ def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("SWITCH", " $src"), []>;
+ def CASE : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("CASE", " $src"), []>;
+ def BREAK : ILFormat< (outs), (ins),
+ "BREAK", []>;
+ def CONTINUE : ILFormat< (outs), (ins),
+ "CONTINUE", []>;
+ def DEFAULT : ILFormat< (outs), (ins),
+ "DEFAULT", []>;
+ def ELSE : ILFormat< (outs), (ins),
+ "ELSE", []>;
+ def ENDSWITCH : ILFormat< (outs), (ins),
+ "ENDSWITCH", []>;
+ def ENDMAIN : ILFormat< (outs), (ins),
+ "ENDMAIN", []>;
+ def END : ILFormat< (outs), (ins),
+ "END", []>;
+ def ENDFUNC : ILFormat< (outs), (ins),
+ "ENDFUNC", []>;
+ def ENDIF : ILFormat< (outs), (ins),
+ "ENDIF", []>;
+ def WHILELOOP : ILFormat< (outs), (ins),
+ "WHILE", []>;
+ def ENDLOOP : ILFormat< (outs), (ins),
+ "ENDLOOP", []>;
+ def FUNC : ILFormat< (outs), (ins),
+ "FUNC", []>;
+ def RETDYN : ILFormat< (outs), (ins),
+ "RET_DYN", []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+ defm IFC : BranchInstr2<"IFC">;
+ defm BREAKC : BranchInstr2<"BREAKC">;
+ defm CONTINUEC : BranchInstr2<"CONTINUEC">;
+}
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+ (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
+ R600_Reg32:$src2, cc),
+ (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT, SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
+//CNDGE_INT extra pattern
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
+ (i32 R600_Reg32:$src2), COND_GT),
+ (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+// KIL Patterns
+def KILP : Pat <
+ (int_AMDGPU_kilp),
+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+>;
+
+def KIL : Pat <
+ (int_AMDGPU_kill R600_Reg32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
+// SGT Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
+ (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SGE Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
+ (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// The next two patterns are special cases for handling 'true if ordered' and
+// 'true if unordered' conditionals. The assumption here is that the behavior of
+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
+// described here:
+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
+// We assume that SETE returns false when one of the operands is NAN and
+// SNE returns true when on of the operands is NAN
+
+//SETE - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
+ (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SNE - 'true if unordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
+ (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+def : BitConvert <v4i32, v4f32, R600_Reg128>;
+
+// DWORDADDR pattern
+def : DwordAddrPat <i32, R600_Reg32>;
+
+} // End isR600toCayman Predicate
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
new file mode 100644
index 000000000000..dc8980aef146
--- /dev/null
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -0,0 +1,31 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_interp_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_load_texbuf :
+ Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_store_swizzle :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_R600_store_stream_output :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_R600_store_pixel_depth :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_stencil :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_dummy :
+ Intrinsic<[], [llvm_i32_ty], []>;
+}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..018b40363363
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF) { }
+
+
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
new file mode 100644
index 000000000000..99c1f91b09b1
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -0,0 +1,32 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "AMDGPUMachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+ R600MachineFunctionInfo(const MachineFunction &MF);
+ SmallVector<unsigned, 4> LiveOuts;
+ std::vector<unsigned> IndirectRegs;
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000000000000..a777142a9e70
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,427 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+
+ DAG = dag;
+ TII = static_cast<const R600InstrInfo*>(DAG->TII);
+ TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+ MRI = &DAG->MRI;
+ Available[IDAlu]->clear();
+ Available[IDFetch]->clear();
+ Available[IDOther]->clear();
+ CurInstKind = IDOther;
+ CurEmitted = 0;
+ OccupedSlotsMask = 15;
+ InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
+
+
+ const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+ InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+ } else {
+ InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+ }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+ if (QSrc->empty())
+ return;
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ QDst->push(*I);
+ }
+ QSrc->clear();
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+ SUnit *SU = 0;
+ IsTopNode = true;
+ NextInstKind = IDOther;
+
+ // check if we might want to switch current clause type
+ bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
+ (CurEmitted > InstKindLimit[CurInstKind]) ||
+ (Available[CurInstKind]->empty());
+ bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
+ (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
+
+ if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+ (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+ // try to pick ALU
+ SU = pickAlu();
+ if (SU) {
+ if (CurEmitted > InstKindLimit[IDAlu])
+ CurEmitted = 0;
+ NextInstKind = IDAlu;
+ }
+ }
+
+ if (!SU) {
+ // try to pick FETCH
+ SU = pickOther(IDFetch);
+ if (SU)
+ NextInstKind = IDFetch;
+ }
+
+ // try to pick other
+ if (!SU) {
+ SU = pickOther(IDOther);
+ if (SU)
+ NextInstKind = IDOther;
+ }
+
+ DEBUG(
+ if (SU) {
+ dbgs() << "picked node: ";
+ SU->dump(DAG);
+ } else {
+ dbgs() << "NO NODE ";
+ for (int i = 0; i < IDLast; ++i) {
+ Available[i]->dump();
+ Pending[i]->dump();
+ }
+ for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+ const SUnit &S = DAG->SUnits[i];
+ if (!S.isScheduled)
+ S.dump(DAG);
+ }
+ }
+ );
+
+ return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+ DEBUG(dbgs() << "scheduled: ");
+ DEBUG(SU->dump(DAG));
+
+ if (NextInstKind != CurInstKind) {
+ DEBUG(dbgs() << "Instruction Type Switch\n");
+ if (NextInstKind != IDAlu)
+ OccupedSlotsMask = 15;
+ CurEmitted = 0;
+ CurInstKind = NextInstKind;
+ }
+
+ if (CurInstKind == IDAlu) {
+ switch (getAluKind(SU)) {
+ case AluT_XYZW:
+ CurEmitted += 4;
+ break;
+ case AluDiscarded:
+ break;
+ default: {
+ ++CurEmitted;
+ for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+ E = SU->getInstr()->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++CurEmitted;
+ }
+ }
+ }
+ } else {
+ ++CurEmitted;
+ }
+
+
+ DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+
+ if (CurInstKind != IDFetch) {
+ MoveUnits(Pending[IDFetch], Available[IDFetch]);
+ }
+ MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+ int IK = getInstKind(SU);
+
+ DEBUG(dbgs() << IK << " <= ");
+ DEBUG(SU->dump(DAG));
+
+ Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+ const TargetRegisterClass *RC) const {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->contains(Reg);
+ } else {
+ return MRI->getRegClass(Reg) == RC;
+ }
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+ MachineInstr *MI = SU->getInstr();
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ return AluT_XYZW;
+ case AMDGPU::COPY:
+ if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+ // %vregX = COPY Tn_X is likely to be discarded in favor of an
+ // assignement of Tn_X to %vregX, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ else if (MI->getOperand(1).isUndef()) {
+ // MI will become a KILL, don't considers it in scheduling
+ return AluDiscarded;
+ }
+ default:
+ break;
+ }
+
+ // Does the instruction take a whole IG ?
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return AluT_XYZW;
+
+ // Is the result already assigned to a channel ?
+ unsigned DestSubReg = MI->getOperand(0).getSubReg();
+ switch (DestSubReg) {
+ case AMDGPU::sub0:
+ return AluT_X;
+ case AMDGPU::sub1:
+ return AluT_Y;
+ case AMDGPU::sub2:
+ return AluT_Z;
+ case AMDGPU::sub3:
+ return AluT_W;
+ default:
+ break;
+ }
+
+ // Is the result already member of a X/Y/Z/W class ?
+ unsigned DestReg = MI->getOperand(0).getReg();
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
+ regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+ return AluT_X;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+ return AluT_Y;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+ return AluT_Z;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+ return AluT_W;
+ if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+ return AluT_XYZW;
+
+ return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+ int Opcode = SU->getInstr()->getOpcode();
+
+ if (TII->isALUInstr(Opcode)) {
+ return IDAlu;
+ }
+
+ switch (Opcode) {
+ case AMDGPU::COPY:
+ case AMDGPU::CONST_COPY:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return IDAlu;
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return IDFetch;
+ default:
+ DEBUG(
+ dbgs() << "other inst: ";
+ SU->dump(DAG);
+ );
+ return IDOther;
+ }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
+ if (Q.empty())
+ return NULL;
+ for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
+ It != E; ++It) {
+ SUnit *SU = *It;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
+ Q.erase(It);
+ return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
+ }
+ }
+ return NULL;
+}
+
+void R600SchedStrategy::LoadAlu() {
+ ReadyQueue *QSrc = Pending[IDAlu];
+ for (ReadyQueue::iterator I = QSrc->begin(),
+ E = QSrc->end(); I != E; ++I) {
+ (*I)->NodeQueueId &= ~QSrc->getID();
+ AluKind AK = getAluKind(*I);
+ AvailableAlus[AK].insert(*I);
+ }
+ QSrc->clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+ DEBUG(dbgs() << "New Slot\n");
+ assert (OccupedSlotsMask && "Slot wasn't filled");
+ OccupedSlotsMask = 0;
+ InstructionsGroupCandidate.clear();
+ LoadAlu();
+}
+
+void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ // PressureRegister crashes if an operand is def and used in the same inst
+ // and we try to constraint its regclass
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && !MO.isDef() &&
+ MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
+ // Constrains the regclass of DestReg to assign it to Slot
+ switch (Slot) {
+ case 0:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+ break;
+ case 1:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+ break;
+ case 2:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+ break;
+ case 3:
+ MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+ break;
+ }
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+ static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+ if (!UnslotedSU) {
+ return SlotedSU;
+ } else if (!SlotedSU) {
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ } else {
+ //Determine which one to pick (the lesser one)
+ if (CompareSUnit()(SlotedSU, UnslotedSU)) {
+ AvailableAlus[AluAny].insert(UnslotedSU);
+ return SlotedSU;
+ } else {
+ AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
+ AssignSlot(UnslotedSU->getInstr(), Slot);
+ return UnslotedSU;
+ }
+ }
+}
+
+bool R600SchedStrategy::isAvailablesAluEmpty() const {
+ return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
+ AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
+ AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
+ AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+ while (!isAvailablesAluEmpty()) {
+ if (!OccupedSlotsMask) {
+ // Flush physical reg copies (RA will discard them)
+ if (!AvailableAlus[AluDiscarded].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluDiscarded]);
+ }
+ // If there is a T_XYZW alu available, use it
+ if (!AvailableAlus[AluT_XYZW].empty()) {
+ OccupedSlotsMask = 15;
+ return PopInst(AvailableAlus[AluT_XYZW]);
+ }
+ }
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ bool isOccupied = OccupedSlotsMask & (1 << Chan);
+ if (!isOccupied) {
+ SUnit *SU = AttemptFillSlot(Chan);
+ if (SU) {
+ OccupedSlotsMask |= (1 << Chan);
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ return SU;
+ }
+ }
+ }
+ PrepareNextSlot();
+ }
+ return NULL;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+ SUnit *SU = 0;
+ ReadyQueue *AQ = Available[QID];
+
+ if (AQ->empty()) {
+ MoveUnits(Pending[QID], AQ);
+ }
+ if (!AQ->empty()) {
+ SU = *AQ->begin();
+ AQ->remove(AQ->begin());
+ }
+ return SU;
+}
+
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
new file mode 100644
index 000000000000..3d0367fd8ebf
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -0,0 +1,120 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class CompareSUnit {
+public:
+ bool operator()(const SUnit *S1, const SUnit *S2) {
+ return S1->getDepth() > S2->getDepth();
+ }
+};
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+ const ScheduleDAGMI *DAG;
+ const R600InstrInfo *TII;
+ const R600RegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ enum InstQueue {
+ QAlu = 1,
+ QFetch = 2,
+ QOther = 4
+ };
+
+ enum InstKind {
+ IDAlu,
+ IDFetch,
+ IDOther,
+ IDLast
+ };
+
+ enum AluKind {
+ AluAny,
+ AluT_X,
+ AluT_Y,
+ AluT_Z,
+ AluT_W,
+ AluT_XYZW,
+ AluDiscarded, // LLVM Instructions that are going to be eliminated
+ AluLast
+ };
+
+ ReadyQueue *Available[IDLast], *Pending[IDLast];
+ std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
+
+ InstKind CurInstKind;
+ int CurEmitted;
+ InstKind NextInstKind;
+
+ int InstKindLimit[IDLast];
+
+ int OccupedSlotsMask;
+
+public:
+ R600SchedStrategy() :
+ DAG(0), TII(0), TRI(0), MRI(0) {
+ Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+ Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+ Available[IDOther] = new ReadyQueue(QOther, "AOther");
+ Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+ Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+ Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+ }
+
+ virtual ~R600SchedStrategy() {
+ for (unsigned I = 0; I < IDLast; ++I) {
+ delete Available[I];
+ delete Pending[I];
+ }
+ }
+
+ virtual void initialize(ScheduleDAGMI *dag);
+ virtual SUnit *pickNode(bool &IsTopNode);
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+ virtual void releaseTopNode(SUnit *SU);
+ virtual void releaseBottomNode(SUnit *SU);
+
+private:
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
+
+ int getInstKind(SUnit *SU);
+ bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+ AluKind getAluKind(SUnit *SU) const;
+ void LoadAlu();
+ bool isAvailablesAluEmpty() const;
+ SUnit *AttemptFillSlot (unsigned Slot);
+ void PrepareNextSlot();
+ SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
+
+ void AssignSlot(MachineInstr *MI, unsigned Slot);
+ SUnit* pickAlu();
+ SUnit* pickOther(int QID);
+ void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
new file mode 100644
index 000000000000..bbd7995d7d51
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -0,0 +1,99 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+
+ Reserved.set(AMDGPU::ZERO);
+ Reserved.set(AMDGPU::HALF);
+ Reserved.set(AMDGPU::ONE);
+ Reserved.set(AMDGPU::ONE_INT);
+ Reserved.set(AMDGPU::NEG_HALF);
+ Reserved.set(AMDGPU::NEG_ONE);
+ Reserved.set(AMDGPU::PV_X);
+ Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::ALU_CONST);
+ Reserved.set(AMDGPU::PREDICATE_BIT);
+ Reserved.set(AMDGPU::PRED_SEL_OFF);
+ Reserved.set(AMDGPU::PRED_SEL_ZERO);
+ Reserved.set(AMDGPU::PRED_SEL_ONE);
+
+ for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+ E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
+ E = AMDGPU::TRegMemRegClass.end();
+ I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
+ std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
+ for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
+ E = IndirectRegs.end();
+ I != E; ++I) {
+ Reserved.set(*I);
+ }
+ return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ case AMDGPU::GPRI32RegClassID:
+ return &AMDGPU::R600_Reg32RegClass;
+ default: return rc;
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
+ return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+ }
+}
+
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+ switch (Channel) {
+ default: assert(!"Invalid channel index"); return 0;
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ }
+}
+
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
new file mode 100644
index 000000000000..f9ca918f246b
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -0,0 +1,55 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+namespace llvm {
+
+class R600TargetMachine;
+class TargetInstrInfo;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the R600 reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *getISARegClass(
+ const TargetRegisterClass *RC) const;
+
+ /// \brief get the HW encoding for a register's channel.
+ unsigned getHWRegChan(unsigned reg) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
new file mode 100644
index 000000000000..03f49761ea40
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -0,0 +1,209 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class R600RegWithChan <string name, bits<9> sel, string chan> :
+ Register <name> {
+
+ field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
+ !if(!eq(chan, "Y"), 1,
+ !if(!eq(chan, "Z"), 2,
+ !if(!eq(chan, "W"), 3, 0))));
+ let HWEncoding{8-0} = sel;
+ let HWEncoding{10-9} = chan_encoding;
+ let Namespace = "AMDGPU";
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1, sub2, sub3];
+ let HWEncoding = encoding;
+}
+
+foreach Index = 0-127 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+ // Indirect addressing offset registers
+ def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+ Index, Chan>;
+ def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
+ Chan>;
+ }
+ // 128-bit Temporary Registers
+ def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
+ [!cast<Register>("T"#Index#"_X"),
+ !cast<Register>("T"#Index#"_Y"),
+ !cast<Register>("T"#Index#"_Z"),
+ !cast<Register>("T"#Index#"_W")],
+ Index>;
+}
+
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW",
+ [!cast<Register>("KC0_"#Index#"_X"),
+ !cast<Register>("KC0_"#Index#"_Y"),
+ !cast<Register>("KC0_"#Index#"_Z"),
+ !cast<Register>("KC0_"#Index#"_W")],
+ Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW",
+ [!cast<Register>("KC1_"#Index#"_X"),
+ !cast<Register>("KC1_"#Index#"_Y"),
+ !cast<Register>("KC1_"#Index#"_Z"),
+ !cast<Register>("KC1_"#Index#"_W")],
+ Index>;
+}
+
+
+// Array Base Register holding input in FS
+foreach Index = 448-480 in {
+ def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
+}
+
+
+// Special Registers
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
+
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "ArrayBase%u", 448, 480))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
+
+let isAllocatable = 0 in {
+
+// XXX: Only use the X channel, until we support wider stack widths
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+
+} // End isAllocatable = 0
+
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC0_X, R600_KC0_Y,
+ R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC1_X, R600_KC1_Y,
+ R600_KC1_Z, R600_KC1_W)>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_X", 0, 127), AR_X)>;
+
+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Y", 0, 127))>;
+
+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Z", 0, 127))>;
+
+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_W", 0, 127))>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_TReg32_X, R600_TReg32_Y,
+ R600_TReg32_Z, R600_TReg32_W)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_TReg32,
+ R600_ArrayBase,
+ R600_Addr,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+ ALU_CONST, ALU_PARAM
+ )>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+ PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add (sequence "T%u_XYZW", 0, 127))> {
+ let CopyCost = -1;
+}
+
+//===----------------------------------------------------------------------===//
+// Register classes for indirect addressing
+//===----------------------------------------------------------------------===//
+
+// Super register for all the Indirect Registers. This register class is used
+// by the REG_SEQUENCE instruction to specify the registers to use for direct
+// reads / writes which may be written / read by an indirect address.
+class IndirectSuper<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices =
+ [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
+}
+
+def IndirectSuperReg : IndirectSuper<"Indirect",
+ [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
+ TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
+ TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+>;
+
+def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+
+// This register class defines the registers that are the storage units for
+// the "Indirect Addressing" pseudo memory space.
+// XXX: Only use the X channel, until we support wider stack widths
+def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "TRegMem%u_X", 0, 16))
+>;
diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td
new file mode 100644
index 000000000000..7ede181c51dc
--- /dev/null
+++ b/lib/Target/R600/R600Schedule.td
@@ -0,0 +1,36 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
+// slot has been removed.
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+
+def R600_EG_Itin : ProcessorItineraries <
+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+ [],
+ [
+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+ InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+ ]
+>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
new file mode 100644
index 000000000000..2477e2a9dcc3
--- /dev/null
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -0,0 +1,329 @@
+//===-- SIAnnotateControlFlow.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Annotates the control flow with hardware specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Complex types used in this pass
+typedef std::pair<BasicBlock *, Value *> StackEntry;
+typedef SmallVector<StackEntry, 16> StackVector;
+
+// Intrinsic names the control flow is annotated with
+static const char *IfIntrinsic = "llvm.SI.if";
+static const char *ElseIntrinsic = "llvm.SI.else";
+static const char *BreakIntrinsic = "llvm.SI.break";
+static const char *IfBreakIntrinsic = "llvm.SI.if.break";
+static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
+static const char *LoopIntrinsic = "llvm.SI.loop";
+static const char *EndCfIntrinsic = "llvm.SI.end.cf";
+
+class SIAnnotateControlFlow : public FunctionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ Type *Void;
+ Type *Int64;
+ Type *ReturnStruct;
+
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+ Constant *Int64Zero;
+
+ Constant *If;
+ Constant *Else;
+ Constant *Break;
+ Constant *IfBreak;
+ Constant *ElseBreak;
+ Constant *Loop;
+ Constant *EndCf;
+
+ DominatorTree *DT;
+ StackVector Stack;
+ SSAUpdater PhiInserter;
+
+ bool isTopOfStack(BasicBlock *BB);
+
+ Value *popSaved();
+
+ void push(BasicBlock *BB, Value *Saved);
+
+ bool isElse(PHINode *Phi);
+
+ void eraseIfUnused(PHINode *Phi);
+
+ void openIf(BranchInst *Term);
+
+ void insertElse(BranchInst *Term);
+
+ void handleLoopCondition(Value *Cond);
+
+ void handleLoop(BranchInst *Term);
+
+ void closeControlFlow(BasicBlock *BB);
+
+public:
+ SIAnnotateControlFlow():
+ FunctionPass(ID) { }
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual const char *getPassName() const {
+ return "SI annotate control flow";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char SIAnnotateControlFlow::ID = 0;
+
+/// \brief Initialize all the types and constants used in the pass
+bool SIAnnotateControlFlow::doInitialization(Module &M) {
+ LLVMContext &Context = M.getContext();
+
+ Void = Type::getVoidTy(Context);
+ Boolean = Type::getInt1Ty(Context);
+ Int64 = Type::getInt64Ty(Context);
+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
+
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+ Int64Zero = ConstantInt::get(Int64, 0);
+
+ If = M.getOrInsertFunction(
+ IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
+
+ Else = M.getOrInsertFunction(
+ ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
+
+ Break = M.getOrInsertFunction(
+ BreakIntrinsic, Int64, Int64, (Type *)0);
+
+ IfBreak = M.getOrInsertFunction(
+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
+
+ ElseBreak = M.getOrInsertFunction(
+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
+
+ Loop = M.getOrInsertFunction(
+ LoopIntrinsic, Boolean, Int64, (Type *)0);
+
+ EndCf = M.getOrInsertFunction(
+ EndCfIntrinsic, Void, Int64, (Type *)0);
+
+ return false;
+}
+
+/// \brief Is BB the last block saved on the stack ?
+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
+ return !Stack.empty() && Stack.back().first == BB;
+}
+
+/// \brief Pop the last saved value from the control flow stack
+Value *SIAnnotateControlFlow::popSaved() {
+ return Stack.pop_back_val().second;
+}
+
+/// \brief Push a BB and saved value to the control flow stack
+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
+ Stack.push_back(std::make_pair(BB, Saved));
+}
+
+/// \brief Can the condition represented by this PHI node treated like
+/// an "Else" block?
+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
+ BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ if (Phi->getIncomingBlock(i) == IDom) {
+
+ if (Phi->getIncomingValue(i) != BoolTrue)
+ return false;
+
+ } else {
+ if (Phi->getIncomingValue(i) != BoolFalse)
+ return false;
+
+ }
+ }
+ return true;
+}
+
+// \brief Erase "Phi" if it is not used any more
+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+ if (!Phi->hasNUsesOrMore(1))
+ Phi->eraseFromParent();
+}
+
+/// \brief Open a new "If" block
+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Close the last "If" block and open a new "Else" block
+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+ Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Recursively handle the condition leading to a loop
+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+ if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+
+ // Handle all non constant incoming values first
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (isa<ConstantInt>(Incoming))
+ continue;
+
+ Phi->setIncomingValue(i, BoolFalse);
+ handleLoopCondition(Incoming);
+ }
+
+ BasicBlock *Parent = Phi->getParent();
+ BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
+
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (Incoming != BoolTrue)
+ continue;
+
+ BasicBlock *From = Phi->getIncomingBlock(i);
+ if (From == IDom) {
+ CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
+ if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
+ Value *Args[] = {
+ OldEnd->getArgOperand(0),
+ PhiInserter.GetValueAtEndOfBlock(Parent)
+ };
+ Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+ continue;
+ }
+ }
+
+ TerminatorInst *Insert = From->getTerminator();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
+ Value *Ret = CallInst::Create(Break, Arg, "", Insert);
+ PhiInserter.AddAvailableValue(From, Ret);
+ }
+ eraseIfUnused(Phi);
+
+ } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+ BasicBlock *Parent = Inst->getParent();
+ TerminatorInst *Insert = Parent->getTerminator();
+ Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
+ Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+
+ } else {
+ assert(0 && "Unhandled loop condition!");
+ }
+}
+
+/// \brief Handle a back edge (loop)
+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *Target = Term->getSuccessor(1);
+ PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+
+ PhiInserter.Initialize(Int64, "");
+ PhiInserter.AddAvailableValue(Target, Broken);
+
+ Value *Cond = Term->getCondition();
+ Term->setCondition(BoolTrue);
+ handleLoopCondition(Cond);
+
+ BasicBlock *BB = Term->getParent();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
+ for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
+ PI != PE; ++PI) {
+
+ Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+ }
+
+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+ push(Term->getSuccessor(0), Arg);
+}
+
+/// \brief Close the last opened control flow
+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+ CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+}
+
+/// \brief Annotate the control flow with intrinsics so the backend can
+/// recognize if/then/else and loops.
+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+
+ for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
+ E = df_end(&F.getEntryBlock()); I != E; ++I) {
+
+ BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+
+ if (!Term || Term->isUnconditional()) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ continue;
+ }
+
+ if (I.nodeVisited(Term->getSuccessor(1))) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ handleLoop(Term);
+ continue;
+ }
+
+ if (isTopOfStack(*I)) {
+ PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
+ if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+ insertElse(Term);
+ eraseIfUnused(Phi);
+ continue;
+ }
+ closeControlFlow(*I);
+ }
+ openIf(Term);
+ }
+
+ assert(Stack.empty());
+ return true;
+}
+
+/// \brief Create the annotation pass
+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
+ return new SIAnnotateControlFlow();
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
new file mode 100644
index 000000000000..6f0c30761506
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -0,0 +1,670 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for SI
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "AMDIL.h"
+#include "AMDGPU.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
+ TRI(TM.getRegisterInfo()) {
+
+ addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
+ addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
+
+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+
+ addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+
+ addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+
+ addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+
+ addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
+
+ addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
+
+ computeRegisterProperties();
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::ADD, MVT::i32, Legal);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setTargetDAGCombine(ISD::SETCC);
+
+ setSchedulingPreference(Sched::RegPressure);
+}
+
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ assert(CallConv == CallingConv::C);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ uint32_t Skipped = 0;
+
+ for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+
+ // First check if it's a PS input addr
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+
+ assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+ if (!Arg.Used) {
+ // We can savely skip PS inputs
+ Skipped |= 1 << i;
+ ++PSInputNum;
+ continue;
+ }
+
+ Info->PSInputAddr |= 1 << PSInputNum++;
+ }
+
+ // Second split vertices into their elements
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eigth.
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned j = 0; j != NumElements; ++j) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
+ }
+
+ } else {
+ Splits.push_back(Arg);
+ }
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // At least one interpolation mode must be enabled or else the GPU will hang.
+ if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+ Info->PSInputAddr |= 1;
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ }
+
+ AnalyzeFormalArguments(CCInfo, Splits);
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+
+ if (Skipped & (1 << i)) {
+ InVals.push_back(SDValue());
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ MVT VT = VA.getLocVT();
+
+ if (VT == MVT::i64) {
+ // For now assume it is a pointer
+ Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
+ &AMDGPU::SReg_64RegClass);
+ Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+ InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ continue;
+ }
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ const ISD::InputArg &Arg = Ins[i];
+ if (Arg.VT.isVector()) {
+
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+ Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ for (unsigned j = 0; j != NumElements; ++j)
+ Regs.push_back(DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
+ Regs.data(), Regs.size()));
+ continue;
+ }
+
+ InVals.push_back(Val);
+ }
+ return Chain;
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+
+ switch (MI->getOpcode()) {
+ default:
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::BRANCH: return BB;
+ }
+ return BB;
+}
+
+EVT SITargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ }
+ return SDValue();
+}
+
+/// \brief Helper function for LowerBRCOND
+static SDNode *findUser(SDValue Value, unsigned Opcode) {
+
+ SDNode *Parent = Value.getNode();
+ for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
+ I != E; ++I) {
+
+ if (I.getUse().get() != Value)
+ continue;
+
+ if (I->getOpcode() == Opcode)
+ return *I;
+ }
+ return 0;
+}
+
+/// This transforms the control flow intrinsics to get the branch destination as
+/// last parameter, also switches branch target with BR if the need arise
+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = BRCOND.getDebugLoc();
+
+ SDNode *Intr = BRCOND.getOperand(1).getNode();
+ SDValue Target = BRCOND.getOperand(2);
+ SDNode *BR = 0;
+
+ if (Intr->getOpcode() == ISD::SETCC) {
+ // As long as we negate the condition everything is fine
+ SDNode *SetCC = Intr;
+ assert(SetCC->getConstantOperandVal(1) == 1);
+ assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
+ ISD::SETNE);
+ Intr = SetCC->getOperand(0).getNode();
+
+ } else {
+ // Get the target from BR if we don't negate the condition
+ BR = findUser(BRCOND, ISD::BR);
+ Target = BR->getOperand(1);
+ }
+
+ assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+
+ // Build the result and
+ SmallVector<EVT, 4> Res;
+ for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
+ Res.push_back(Intr->getValueType(i));
+
+ // operands of the new intrinsic call
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(BRCOND.getOperand(0));
+ for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
+ Ops.push_back(Intr->getOperand(i));
+ Ops.push_back(Target);
+
+ // build the new intrinsic call
+ SDNode *Result = DAG.getNode(
+ Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+
+ if (BR) {
+ // Give the branch instruction our target
+ SDValue Ops[] = {
+ BR->getOperand(0),
+ BRCOND.getOperand(2)
+ };
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ }
+
+ SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
+
+ // Copy the intrinsic results to registers
+ for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
+ SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
+ if (!CopyToReg)
+ continue;
+
+ Chain = DAG.getCopyToReg(
+ Chain, DL,
+ CopyToReg->getOperand(1),
+ SDValue(Result, i - 1),
+ SDValue());
+
+ DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
+ }
+
+ // Remove the old intrinsic from the chain
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(Intr, Intr->getNumValues() - 1),
+ Intr->getOperand(0));
+
+ return Chain;
+}
+
+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC: {
+ N->dump();
+ ConstantSDNode *True, *False;
+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
+ if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ && True->isAllOnesValue()
+ && False->isNullValue()
+ && VT == MVT::i1) {
+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(4));
+
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ SDValue Arg0 = N->getOperand(0);
+ SDValue Arg1 = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ ConstantSDNode * C = NULL;
+ ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
+
+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
+ if (VT == MVT::i1
+ && Arg0.getOpcode() == ISD::SIGN_EXTEND
+ && Arg0.getOperand(0).getValueType() == MVT::i1
+ && (C = dyn_cast<ConstantSDNode>(Arg1))
+ && C->isNullValue()
+ && CCOp == ISD::SETNE) {
+ return SimplifySetCC(VT, Arg0.getOperand(0),
+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+/// \brief Test if RegClass is one of the VSrc classes
+static bool isVSrc(unsigned RegClass) {
+ return AMDGPU::VSrc_32RegClassID == RegClass ||
+ AMDGPU::VSrc_64RegClassID == RegClass;
+}
+
+/// \brief Test if RegClass is one of the SSrc classes
+static bool isSSrc(unsigned RegClass) {
+ return AMDGPU::SSrc_32RegClassID == RegClass ||
+ AMDGPU::SSrc_64RegClassID == RegClass;
+}
+
+/// \brief Analyze the possible immediate value Op
+///
+/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
+/// and the immediate value if it's a literal immediate
+int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
+
+ union {
+ int32_t I;
+ float F;
+ } Imm;
+
+ if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+ Imm.I = Node->getSExtValue();
+ else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ Imm.F = Node->getValueAPF().convertToFloat();
+ else
+ return -1; // It isn't an immediate
+
+ if ((Imm.I >= -16 && Imm.I <= 64) ||
+ Imm.F == 0.5f || Imm.F == -0.5f ||
+ Imm.F == 1.0f || Imm.F == -1.0f ||
+ Imm.F == 2.0f || Imm.F == -2.0f ||
+ Imm.F == 4.0f || Imm.F == -4.0f)
+ return 0; // It's an inline immediate
+
+ return Imm.I; // It's a literal immediate
+}
+
+/// \brief Try to fold an immediate directly into an instruction
+bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const {
+
+ MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
+ if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ return false;
+
+ const SDValue &Op = Mov->getOperand(0);
+ int32_t Value = analyzeImmediate(Op.getNode());
+ if (Value == -1) {
+ // Not an immediate at all
+ return false;
+
+ } else if (Value == 0) {
+ // Inline immediates can always be fold
+ Operand = Op;
+ return true;
+
+ } else if (Value == Immediate) {
+ // Already fold literal immediate
+ Operand = Op;
+ return true;
+
+ } else if (!ScalarSlotUsed && !Immediate) {
+ // Fold this literal immediate
+ ScalarSlotUsed = true;
+ Immediate = Value;
+ Operand = Op;
+ return true;
+
+ }
+
+ return false;
+}
+
+/// \brief Does "Op" fit into register class "RegClass" ?
+bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
+ unsigned RegClass) const {
+
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ SDNode *Node = Op.getNode();
+
+ const TargetRegisterClass *OpClass;
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
+ const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
+ int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+ if (OpClassID == -1)
+ OpClass = getRegClassFor(Op.getSimpleValueType());
+ else
+ OpClass = TRI->getRegClass(OpClassID);
+
+ } else if (Node->getOpcode() == ISD::CopyFromReg) {
+ RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
+ OpClass = MRI.getRegClass(Reg->getReg());
+
+ } else
+ return false;
+
+ return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
+}
+
+/// \brief Make sure that we don't exeed the number of allowed scalars
+void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass,
+ bool &ScalarSlotUsed) const {
+
+ // First map the operands register class to a destination class
+ if (RegClass == AMDGPU::VSrc_32RegClassID)
+ RegClass = AMDGPU::VReg_32RegClassID;
+ else if (RegClass == AMDGPU::VSrc_64RegClassID)
+ RegClass = AMDGPU::VReg_64RegClassID;
+ else
+ return;
+
+ // Nothing todo if they fit naturaly
+ if (fitsRegClass(DAG, Operand, RegClass))
+ return;
+
+ // If the scalar slot isn't used yet use it now
+ if (!ScalarSlotUsed) {
+ ScalarSlotUsed = true;
+ return;
+ }
+
+ // This is a conservative aproach, it is possible that we can't determine
+ // the correct register class and copy too often, but better save than sorry.
+ SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
+ SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(),
+ Operand.getValueType(), Operand, RC);
+ Operand = SDValue(Node, 0);
+}
+
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+
+ // Original encoding (either e32 or e64)
+ int Opcode = Node->getMachineOpcode();
+ const MCInstrDesc *Desc = &TII->get(Opcode);
+
+ unsigned NumDefs = Desc->getNumDefs();
+ unsigned NumOps = Desc->getNumOperands();
+
+ // Commuted opcode if available
+ int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+
+ assert(!DescRev || DescRev->getNumDefs() == NumDefs);
+ assert(!DescRev || DescRev->getNumOperands() == NumOps);
+
+ // e64 version if available, -1 otherwise
+ int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+
+ assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
+ assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
+
+ int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
+ bool HaveVSrc = false, HaveSSrc = false;
+
+ // First figure out what we alread have in this instruction
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (isVSrc(RegClass))
+ HaveVSrc = true;
+ else if (isSSrc(RegClass))
+ HaveSSrc = true;
+ else
+ continue;
+
+ int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
+ if (Imm != -1 && Imm != 0) {
+ // Literal immediate
+ Immediate = Imm;
+ }
+ }
+
+ // If we neither have VSrc nor SSrc it makes no sense to continue
+ if (!HaveVSrc && !HaveSSrc)
+ return Node;
+
+ // No scalar allowed when we have both VSrc and SSrc
+ bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
+
+ // Second go over the operands and try to fold them
+ std::vector<SDValue> Ops;
+ bool Promote2e64 = false;
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ const SDValue &Operand = Node->getOperand(i);
+ Ops.push_back(Operand);
+
+ // Already folded immediate ?
+ if (isa<ConstantSDNode>(Operand.getNode()) ||
+ isa<ConstantFPSDNode>(Operand.getNode()))
+ continue;
+
+ // Is this a VSrc or SSrc operand ?
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (isVSrc(RegClass) || isSSrc(RegClass)) {
+ // Try to fold the immediates
+ if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+ // Folding didn't worked, make sure we don't hit the SReg limit
+ ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+ }
+ continue;
+ }
+
+ if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
+
+ unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
+ assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
+
+ // Test if it makes sense to swap operands
+ if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[1], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+ // Swap commutable operands
+ SDValue Tmp = Ops[1];
+ Ops[1] = Ops[0];
+ Ops[0] = Tmp;
+
+ Desc = DescRev;
+ DescRev = 0;
+ continue;
+ }
+ }
+
+ if (DescE64 && !Immediate) {
+
+ // Test if it makes sense to switch to e64 encoding
+ unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
+ if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
+ continue;
+
+ int32_t TmpImm = -1;
+ if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[i], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+ // Switch to e64 encoding
+ Immediate = -1;
+ Promote2e64 = true;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
+ }
+ }
+
+ if (Promote2e64) {
+ // Add the modifier flags while promoting
+ for (unsigned i = 0; i < 4; ++i)
+ Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+ }
+
+ // Add optional chain and glue
+ for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ // Create a complete new instruction
+ return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
+ Node->getVTList(), Ops.data(), Ops.size());
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
new file mode 100644
index 000000000000..5ad2f40f0f3a
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.h
@@ -0,0 +1,58 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering {
+ const SIInstrInfo * TII;
+ const TargetRegisterInfo * TRI;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+ bool foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const;
+ bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const;
+ void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass, bool &ScalarSlotUsed) const;
+
+public:
+ SITargetLowering(TargetMachine &tm);
+
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+
+ int32_t analyzeImmediate(const SDNode *N) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
new file mode 100644
index 000000000000..98bd3dbb6646
--- /dev/null
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -0,0 +1,358 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+ struct {
+ unsigned VM;
+ unsigned EXP;
+ unsigned LGKM;
+ } Named;
+ unsigned Array[3];
+
+} Counters;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo &TRI;
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Constant hardware limits
+ static const Counters WaitCounts;
+
+ /// \brief Constant zero value
+ static const Counters ZeroCounts;
+
+ /// \brief Counter values we have already waited on.
+ Counters WaitedOn;
+
+ /// \brief Counter values for last instruction issued.
+ Counters LastIssued;
+
+ /// \brief Registers used by async instructions.
+ RegCounters UsedRegs;
+
+ /// \brief Registers defined by async instructions.
+ RegCounters DefinedRegs;
+
+ /// \brief Different export instruction types seen since last wait.
+ unsigned ExpInstrTypesSeen;
+
+ /// \brief Get increment/decrement amount for this instruction.
+ Counters getHwCounts(MachineInstr &MI);
+
+ /// \brief Is operand relevant for async execution?
+ bool isOpRelevant(MachineOperand &Op);
+
+ /// \brief Get register interval an operand affects.
+ RegInterval getRegInterval(MachineOperand &Op);
+
+ /// \brief Handle instructions async components
+ void pushInstruction(MachineInstr &MI);
+
+ /// \brief Insert the actual wait instruction
+ bool insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Counts);
+
+ /// \brief Do we need def2def checks?
+ bool unorderedDefines(MachineInstr &MI);
+
+ /// \brief Resolve all operand dependencies to counter requirements
+ Counters handleOperands(MachineInstr &MI);
+
+public:
+ SIInsertWaits(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
+ TRI(TII->getRegisterInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI insert wait instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+ return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+ Counters Result;
+
+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+ // Only consider stores or EXP for EXP_CNT
+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+ (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+
+ // LGKM may uses larger values
+ if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ Result.Named.LGKM = 0;
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+ // Constants are always irrelevant
+ if (!Op.isReg())
+ return false;
+
+ // Defines are always relevant
+ if (Op.isDef())
+ return true;
+
+ // For exports all registers are relevant
+ MachineInstr &MI = *Op.getParent();
+ if (MI.getOpcode() == AMDGPU::EXP)
+ return true;
+
+ // For stores the stored value is also relevant
+ if (!MI.getDesc().mayStore())
+ return false;
+
+ for (MachineInstr::mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end(); I != E; ++I) {
+
+ if (I->isReg() && I->isUse())
+ return Op.isIdenticalTo(*I);
+ }
+
+ return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+ if (!Op.isReg())
+ return std::make_pair(0, 0);
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+
+ assert(Size >= 4);
+
+ RegInterval Result;
+ Result.first = TRI.getEncodingValue(Reg);
+ Result.second = Result.first + Size / 4;
+
+ return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+
+ // Get the hardware counter increments and sum them up
+ Counters Increment = getHwCounts(MI);
+ unsigned Sum = 0;
+
+ for (unsigned i = 0; i < 3; ++i) {
+ LastIssued.Array[i] += Increment.Array[i];
+ Sum += Increment.Array[i];
+ }
+
+ // If we don't increase anything then that's it
+ if (Sum == 0)
+ return;
+
+ // Remember which export instructions we have seen
+ if (Increment.Named.EXP) {
+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ if (!isOpRelevant(Op))
+ continue;
+
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ // Remember which registers we define
+ if (Op.isDef())
+ DefinedRegs[j] = LastIssued;
+
+ // and which one we are using
+ if (Op.isUse())
+ UsedRegs[j] = LastIssued;
+ }
+ }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Required) {
+
+ // End of program? No need to wait on anything
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ return false;
+
+ // Figure out if the async instructions execute in order
+ bool Ordered[3];
+
+ // VM_CNT is always ordered
+ Ordered[0] = true;
+
+ // EXP_CNT is unordered if we have both EXP & VM-writes
+ Ordered[1] = ExpInstrTypesSeen == 3;
+
+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+ Ordered[2] = false;
+
+ // The values we are going to put into the S_WAITCNT instruction
+ Counters Counts = WaitCounts;
+
+ // Do we really need to wait?
+ bool NeedWait = false;
+
+ for (unsigned i = 0; i < 3; ++i) {
+
+ if (Required.Array[i] <= WaitedOn.Array[i])
+ continue;
+
+ NeedWait = true;
+
+ if (Ordered[i]) {
+ unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+ // adjust the value to the real hardware posibilities
+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+ } else
+ Counts.Array[i] = 0;
+
+ // Remember on what we have waited on
+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+ }
+
+ if (!NeedWait)
+ return false;
+
+ // Reset EXP_CNT instruction types
+ if (Counts.Named.EXP == 0)
+ ExpInstrTypesSeen = 0;
+
+ // Build the wait instruction
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm((Counts.Named.VM & 0xF) |
+ ((Counts.Named.EXP & 0x7) << 4) |
+ ((Counts.Named.LGKM & 0x7) << 8));
+
+ return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+ for (unsigned i = 0; i < 3; ++i)
+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+ Counters Result = ZeroCounts;
+
+ // For each register affected by this
+ // instruction increase the result sequence
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ if (Op.isDef()) {
+ increaseCounters(Result, UsedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+
+ if (Op.isUse())
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changes = false;
+
+ MRI = &MF.getRegInfo();
+
+ WaitedOn = ZeroCounts;
+ LastIssued = ZeroCounts;
+
+ memset(&UsedRegs, 0, sizeof(UsedRegs));
+ memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+
+ Changes |= insertWait(MBB, I, handleOperands(*I));
+ pushInstruction(*I);
+ }
+
+ // Wait for everything at the end of the MBB
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ }
+
+ return Changes;
+}
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
new file mode 100644
index 000000000000..3891ddb2dbe2
--- /dev/null
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -0,0 +1,426 @@
+//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<1> VM_CNT = 0;
+ field bits<1> EXP_CNT = 0;
+ field bits<1> LGKM_CNT = 0;
+
+ let TSFlags{0} = VM_CNT;
+ let TSFlags{1} = EXP_CNT;
+ let TSFlags{2} = LGKM_CNT;
+}
+
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+ let Size = 4;
+}
+
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ let Size = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar operations
+//===----------------------------------------------------------------------===//
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = op;
+ let Inst{22-16} = SDST;
+ let Inst{31-23} = 0x17d; //encoding;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = SDST;
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins , asm, pattern> {
+
+ bits <7> SDST;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = SDST;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+ (outs),
+ ins,
+ asm,
+ pattern > {
+
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
+ list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<7> SBASE;
+ bits<8> OFFSET;
+
+ let Inst{7-0} = OFFSET;
+ let Inst{8} = imm;
+ let Inst{14-9} = SBASE{6-1};
+ let Inst{21-15} = SDST;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+
+ let LGKM_CNT = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = op;
+ let Inst{24-17} = VDST;
+ let Inst{31-25} = 0x3f; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = VDST;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<3> ABS;
+ bits<1> CLAMP;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{10-8} = ABS;
+ let Inst{11} = CLAMP;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<7> SDST;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{14-8} = SDST;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<8> VSRC;
+ bits<2> ATTRCHAN;
+ bits<6> ATTR;
+
+ let Inst{7-0} = VSRC;
+ let Inst{9-8} = ATTRCHAN;
+ let Inst{15-10} = ATTR;
+ let Inst{17-16} = op;
+ let Inst{25-18} = VDST;
+ let Inst{31-26} = 0x32; // encoding
+
+ let neverHasSideEffects = 1;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<1> LDS;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{16} = LDS;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
+}
+
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<4> DFMT;
+ bits<3> NFMT;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{18-16} = op;
+ let Inst{22-19} = DFMT;
+ let Inst{25-23} = NFMT;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<4> DMASK;
+ bits<1> UNORM;
+ bits<1> GLC;
+ bits<1> DA;
+ bits<1> R128;
+ bits<1> TFE;
+ bits<1> LWE;
+ bits<1> SLC;
+ bits<8> VADDR;
+ bits<7> SRSRC;
+ bits<7> SSAMP;
+
+ let Inst{11-8} = DMASK;
+ let Inst{12} = UNORM;
+ let Inst{13} = GLC;
+ let Inst{14} = DA;
+ let Inst{15} = R128;
+ let Inst{16} = TFE;
+ let Inst{17} = LWE;
+ let Inst{24-18} = op;
+ let Inst{25} = SLC;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC{6-2};
+ let Inst{57-53} = SSAMP{6-2};
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+}
+
+def EXP : Enc64<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ bits<4> EN;
+ bits<6> TGT;
+ bits<1> COMPR;
+ bits<1> DONE;
+ bits<1> VM;
+ bits<8> VSRC0;
+ bits<8> VSRC1;
+ bits<8> VSRC2;
+ bits<8> VSRC3;
+
+ let Inst{3-0} = EN;
+ let Inst{9-4} = TGT;
+ let Inst{10} = COMPR;
+ let Inst{11} = DONE;
+ let Inst{12} = VM;
+ let Inst{31-26} = 0x3e;
+ let Inst{39-32} = VSRC0;
+ let Inst{47-40} = VSRC1;
+ let Inst{55-48} = VSRC2;
+ let Inst{63-56} = VSRC3;
+
+ let EXP_CNT = 1;
+}
+
+} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
new file mode 100644
index 000000000000..0bfcef562f04
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -0,0 +1,264 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+
+ // If we are trying to copy to or from SCC, there is a bug somewhere else in
+ // the backend. While it may be theoretically possible to do this, it should
+ // never be necessary.
+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+ const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ };
+
+ const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ };
+
+ const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ };
+
+ const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1, 0
+ };
+
+ unsigned Opcode;
+ const int16_t *SubIndices;
+
+ if (AMDGPU::M0 == DestReg) {
+ // Check if M0 isn't already set to this value
+ for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+ I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+ if (!I->definesRegister(AMDGPU::M0))
+ continue;
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+ break;
+
+ if (!I->readsRegister(SrcReg))
+ break;
+
+ // The copy isn't necessary
+ return;
+ }
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_15;
+
+ } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_1;
+
+ } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_15;
+
+ } else {
+ llvm_unreachable("Can't copy register!");
+ }
+
+ while (unsigned SubIdx = *SubIndices++) {
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+ if (*SubIndices)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+}
+
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+ int NewOpc;
+
+ // Try to map original to commuted opcode
+ if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ return NewOpc;
+
+ // Try to map commuted to original opcode
+ if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ return NewOpc;
+
+ return Opcode;
+}
+
+MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
+ !MI->getOperand(2).isReg())
+ return 0;
+
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+ if (MI)
+ MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+
+ return MI;
+}
+
+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addImm(Imm);
+
+ return MI;
+
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ return true;
+ }
+}
+
+bool
+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ return RC != &AMDGPU::EXECRegRegClass;
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+
+int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
new file mode 100644
index 000000000000..d4e60e508634
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -0,0 +1,97 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+ const SIRegisterInfo RI;
+
+public:
+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+ const SIRegisterInfo &getRegisterInfo() const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ unsigned commuteOpcode(unsigned Opcode) const;
+
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI=false) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ virtual bool isMov(unsigned Opcode) const;
+
+ virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+ };
+
+namespace AMDGPU {
+
+ int getVOPe64(uint16_t Opcode);
+ int getCommuteRev(uint16_t Opcode);
+ int getCommuteOrig(uint16_t Opcode);
+
+} // End namespace AMDGPU
+
+} // End namespace llvm
+
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ VM_CNT = 1 << 0,
+ EXP_CNT = 1 << 1,
+ LGKM_CNT = 1 << 2
+ };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
new file mode 100644
index 000000000000..617f0b871c25
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -0,0 +1,356 @@
+//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+// SMRD takes a 64bit memory address and can only add an 32bit offset
+def SIadd64bit32bit : SDNode<"ISD::ADD",
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
+>;
+
+// Transformation function, extract the lower 32bit of a 64bit immediate
+def LO32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+}]>;
+
+// Transformation function, extract the upper 32bit of a 64bit immediate
+def HI32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+}]>;
+
+def IMM8bitDWORD : ImmLeaf <
+ i32, [{
+ return (Imm & ~0x3FC) == 0;
+ }], SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ N->getZExtValue() >> 2, MVT::i32);
+ }]>
+>;
+
+def IMM12bit : ImmLeaf <
+ i16,
+ [{return isUInt<12>(Imm);}]
+>;
+
+class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
+ return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// SI assembler operands
+//===----------------------------------------------------------------------===//
+
+def SIOperand {
+ int ZERO = 0x80;
+ int VCC = 0x6A;
+}
+
+include "SIInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction multiclass helpers.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Scalar classes
+//===----------------------------------------------------------------------===//
+
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_32:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_64:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
+ def _IMM : SMRD <
+ op, 1, (outs dstClass:$dst),
+ (ins baseClass:$sbase, i32imm:$offset),
+ asm#" $dst, $sbase, $offset", []
+ >;
+
+ def _SGPR : SMRD <
+ op, 0, (outs dstClass:$dst),
+ (ins baseClass:$sbase, SReg_32:$soff),
+ asm#" $dst, $sbase, $soff", []
+ >;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU classes
+//===----------------------------------------------------------------------===//
+
+class VOP <string opName> {
+ string OpName = opName;
+}
+
+class VOP2_REV <string revOp, bit isOrig> {
+ string RevOp = revOp;
+ bit IsOrig = isOrig;
+}
+
+multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
+ string opName, list<dag> pattern> {
+
+ def _e32 : VOP1 <
+ op, (outs drc:$dst), (ins src:$src0),
+ opName#"_e32 $dst, $src0", pattern
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs drc:$dst),
+ (ins src:$src0,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName> {
+ let SRC1 = SIOperand.ZERO;
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern, string revOp> {
+ def _e32 : VOP2 <
+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _e64 : VOP3 <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs vrc:$dst),
+ (ins arc:$src0, arc:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
+
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName> {
+
+ def _e32 : VOP2 <
+ op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _e64 : VOP3b <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let SRC2 = SIOperand.ZERO;
+ /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
+ can write it into any SGPR. We currently don't use the carry out,
+ so for now hardcode it to VCC as well */
+ let SDST = SIOperand.VCC;
+ }
+}
+
+multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, ValueType vt, PatLeaf cond> {
+
+ def _e32 : VOPC <
+ op, (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", []
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs SReg_64:$dst),
+ (ins arc:$src0, arc:$src1,
+ InstFlag:$abs, InstFlag:$clamp,
+ InstFlag:$omod, InstFlag:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
+ !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
+ [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
+ )
+ >, VOP <opName> {
+ let SRC2 = SIOperand.ZERO;
+ }
+}
+
+multiclass VOPC_32 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+
+multiclass VOPC_64 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+//===----------------------------------------------------------------------===//
+// Vector I/O classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs),
+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+ SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 0;
+}
+
+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
+ i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+ #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+ op,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ SReg_256:$srsrc, SReg_128:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector instruction mappings
+//===----------------------------------------------------------------------===//
+
+// Maps an opcode in e32 form to its e64 equivalent
+def getVOPe64 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["Size"];
+ let KeyCol = ["4"];
+ let ValueCols = [["8"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
new file mode 100644
index 000000000000..4f734f91245a
--- /dev/null
+++ b/lib/Target/R600/SIInstructions.td
@@ -0,0 +1,1607 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file was originally auto-generated from a GPU register header file and
+// all the instruction definitions were originally commented out. Instructions
+// that are not yet supported remain commented out.
+//===----------------------------------------------------------------------===//
+
+class InterpSlots {
+int P0 = 2;
+int P10 = 0;
+int P20 = 1;
+}
+def INTERP : InterpSlots;
+
+def InterpSlot : Operand<i32> {
+ let PrintMethod = "printInterpSlot";
+}
+
+def isSI : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
+
+let Predicates = [isSI] in {
+
+let neverHasSideEffects = 1 in {
+
+let isMoveImm = 1 in {
+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+} // End isMoveImm = 1
+
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+} // End neverHasSideEffects = 1
+
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
+
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+
+} // End hasSideEffects = 1
+
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+def S_CMPK_EQ_I32 : SOPK <
+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
+ "S_CMPK_EQ_I32",
+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+let isCompare = 1 in {
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+} // End isCompare = 1
+
+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
+//def EXP : EXP_ <0x00000000, "EXP", []>;
+
+let isCompare = 1 in {
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+} // End isCompare = 1
+
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+
+let mayLoad = 1 in {
+
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
+
+} // mayLoad = 1
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
+def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
+def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+
+
+let neverHasSideEffects = 1, isMoveImm = 1 in {
+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
+} // End neverHasSideEffects = 1, isMoveImm = 1
+
+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
+ [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+>;
+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
+ [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+>;
+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
+ [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
+ [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+>;
+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
+ [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+>;
+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
+ [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+>;
+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
+ [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+>;
+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
+ [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+>;
+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
+ [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+>;
+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_LEGACY_F32 : VOP1_32 <
+ 0x0000002d, "V_RSQ_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+def V_INTERP_P1_F32 : VINTRP <
+ 0x00000000,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
+ []> {
+ let DisableEncoding = "$m0";
+}
+
+def V_INTERP_P2_F32 : VINTRP <
+ 0x00000001,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+ []> {
+
+ let Constraints = "$src0 = $dst";
+ let DisableEncoding = "$src0,$m0";
+
+}
+
+def V_INTERP_MOV_F32 : VINTRP <
+ 0x00000002,
+ (outs VReg_32:$dst),
+ (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
+ []> {
+ let DisableEncoding = "$m0";
+}
+
+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+ [(IL_retflag)]> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+ 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
+ [(br bb:$target)]> {
+ let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC0 $target", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC1 $target",
+ []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCZ $target",
+ []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCNZ $target",
+ []
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+ 0x00000008, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECZ $target",
+ []
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+ 0x00000009, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECNZ $target",
+ []
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
+let hasSideEffects = 1 in {
+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+ []
+>;
+} // End hasSideEffects
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+
+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
+ "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
+ []
+>{
+ let DisableEncoding = "$vcc";
+}
+
+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
+ InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+ "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
+ [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
+ VSrc_32:$src1, VSrc_32:$src0))]
+>;
+
+//f32 pattern for V_CNDMASK_B32_e64
+def : Pat <
+ (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
+ (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+>;
+
+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
+ [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
+ [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
+} // End isCommutable = 1
+
+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+
+let isCommutable = 1 in {
+
+defm V_MUL_LEGACY_F32 : VOP2_32 <
+ 0x00000007, "V_MUL_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
+ [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+
+let isCommutable = 1 in {
+
+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+ [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+ [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
+
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+ [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+ [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+ [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+ [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+ [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+
+let Uses = [VCC] in { // Carry-out comes from VCC
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+} // End Uses = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+let neverHasSideEffects = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
+
+} // End neverHasSideEffects
+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
+def : Pat <
+ (mul VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+ (mulhu VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+ (mulhs VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+
+def S_CSELECT_B32 : SOP2 <
+ 0x0000000a, (outs SReg_32:$dst),
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
+ SReg_32:$src0, SReg_32:$src1))]
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+// f32 pattern for S_CSELECT_B32
+def : Pat <
+ (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
+>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+ [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+>;
+
+def : Pat <
+ (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
+ (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def : Pat <
+ (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
+ (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def LOAD_CONST : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_CONST $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
+// SI Psuedo instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
+ Uses = [EXEC], Defs = [EXEC] in {
+
+let isBranch = 1, isTerminator = 1 in {
+
+def SI_IF : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$vcc, brtarget:$target),
+ "SI_IF $dst, $vcc, $target",
+ [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+>;
+
+def SI_ELSE : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target),
+ "SI_ELSE $dst, $src, $target",
+ [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+
+ let Constraints = "$src = $dst";
+}
+
+def SI_LOOP : InstSI <
+ (outs),
+ (ins SReg_64:$saved, brtarget:$target),
+ "SI_LOOP $saved, $target",
+ [(int_SI_loop SReg_64:$saved, bb:$target)]
+>;
+
+} // end isBranch = 1, isTerminator = 1
+
+def SI_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src),
+ "SI_ELSE $dst, $src",
+ [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+>;
+
+def SI_IF_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$vcc, SReg_64:$src),
+ "SI_IF_BREAK $dst, $vcc, $src",
+ [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+>;
+
+def SI_ELSE_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src0, SReg_64:$src1),
+ "SI_ELSE_BREAK $dst, $src0, $src1",
+ [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+>;
+
+def SI_END_CF : InstSI <
+ (outs),
+ (ins SReg_64:$saved),
+ "SI_END_CF $saved",
+ [(int_SI_end_cf SReg_64:$saved)]
+>;
+
+def SI_KILL : InstSI <
+ (outs),
+ (ins VReg_32:$src),
+ "SI_KIL $src",
+ [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+ // Uses = [EXEC], Defs = [EXEC]
+
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
+} // end IsCodeGenOnly, isPseudo
+
+def : Pat<
+ (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+ (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+>;
+
+def : Pat <
+ (int_AMDGPU_kilp),
+ (SI_KILL (V_MOV_B32_e32 0xbf800000))
+>;
+
+/* int_SI_vs_load_input */
+def : Pat<
+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
+ VReg_32:$buf_idx_vgpr),
+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
+ 0, 0, 0)
+>;
+
+/* int_SI_export */
+def : Pat <
+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+>;
+
+
+/* int_SI_sample for simple 1D texture lookup */
+def : Pat <
+ (int_SI_sample imm:$writemask, VReg_32:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
+ (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
+ def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+}
+
+defm : SamplePatterns<VReg_64, v2i32>;
+defm : SamplePatterns<VReg_128, v4i32>;
+defm : SamplePatterns<VReg_256, v8i32>;
+defm : SamplePatterns<VReg_512, v16i32>;
+
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
+def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
+def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
+def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VReg_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VReg_32>;
+
+/********** =================== **********/
+/********** Src & Dst modifiers **********/
+/********** =================== **********/
+
+def : Pat <
+ (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fabs VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fneg VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+>;
+
+/********** ================== **********/
+/********** Immediate Patterns **********/
+/********** ================== **********/
+
+def : Pat <
+ (i32 imm:$imm),
+ (V_MOV_B32_e32 imm:$imm)
+>;
+
+def : Pat <
+ (f32 fpimm:$imm),
+ (V_MOV_B32_e32 fpimm:$imm)
+>;
+
+def : Pat <
+ (i1 imm:$imm),
+ (S_MOV_B64 imm:$imm)
+>;
+
+def : Pat <
+ (i64 InlineImm<i64>:$imm),
+ (S_MOV_B64 InlineImm<i64>:$imm)
+>;
+
+// i64 immediates aren't supported in hardware, split it into two 32bit values
+def : Pat <
+ (i64 imm:$imm),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
+ (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
+>;
+
+/********** ===================== **********/
+/********** Interpolation Paterns **********/
+/********** ===================== **********/
+
+def : Pat <
+ (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
+ (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
+ imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (EXTRACT_SUBREG VReg_64:$ij, sub1),
+ imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+
+def : Pat <
+ (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat<
+ (fdiv VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat <
+ (fcos VSrc_32:$src0),
+ (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+ (fsin VSrc_32:$src0),
+ (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+ (int_AMDGPU_cube VReg_128:$src),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub0),
+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub1),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub2),
+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub3)
+>;
+
+def : Pat <
+ (i32 (sext (i1 SReg_64:$src0))),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+>;
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
+/********** ================== **********/
+/********** VOP3 Patterns **********/
+/********** ================== **********/
+
+def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
+ (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+ 0, 0, 0, 0)>;
+
+/********** ================== **********/
+/********** SMRD Patterns **********/
+/********** ================== **********/
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+ // 1. Offset as 8bit DWORD immediate
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
+ (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+ >;
+
+ // 2. Offset loaded in an 32bit SGPR
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
+ (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+ >;
+
+ // 3. No offset at all
+ def : Pat <
+ (constant_load SReg_64:$sbase),
+ (vt (Instr_IMM SReg_64:$sbase, 0))
+ >;
+}
+
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
+/********** =============== **********/
+/********** Conditions **********/
+/********** =============== **********/
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETO)),
+ (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+>;
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETUO)),
+ (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+>;
+
+} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
new file mode 100644
index 000000000000..0af378edfe2e
--- /dev/null
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -0,0 +1,42 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_SI_sample : Sample;
+ def int_SI_sampleb : Sample;
+ def int_SI_samplel : Sample;
+
+ /* Interpolation Intrinsics */
+
+ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
+
+ /* Control flow Intrinsics */
+
+ def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
+ def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
+ def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
+ def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
+ def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+}
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
new file mode 100644
index 000000000000..2b60eb9fb375
--- /dev/null
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -0,0 +1,501 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers the pseudo control flow instructions to real
+/// machine instructions.
+///
+/// All control flow is handled using predicated instructions and
+/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
+/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
+/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// single vector ALU). Typically, for predicates, a vector ALU will write
+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
+/// EXEC to update the predicates.
+///
+/// For example:
+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
+/// %SGPR0 = SI_IF %VCC
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
+/// %SGPR0 = SI_ELSE %SGPR0
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
+/// SI_END_CF %SGPR0
+///
+/// becomes:
+///
+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_CBRANCH_EXECZ label0 // This instruction is an optional
+/// // optimization which allows us to
+/// // branch if all the bits of
+/// // EXEC are zero.
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
+///
+/// label0:
+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_BRANCH_EXECZ label1 // Use our branch optimization
+/// // instruction again.
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
+/// label1:
+/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerControlFlowPass : public MachineFunctionPass {
+
+private:
+ static const unsigned SkipThreshold = 12;
+
+ static char ID;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+ void Skip(MachineInstr &From, MachineOperand &To);
+ void SkipIfDead(MachineInstr &MI);
+
+ void If(MachineInstr &MI);
+ void Else(MachineInstr &MI);
+ void Break(MachineInstr &MI);
+ void IfBreak(MachineInstr &MI);
+ void ElseBreak(MachineInstr &MI);
+ void Loop(MachineInstr &MI);
+ void EndCf(MachineInstr &MI);
+
+ void Kill(MachineInstr &MI);
+ void Branch(MachineInstr &MI);
+
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
+public:
+ SILowerControlFlowPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI Lower control flow instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SILowerControlFlowPass::ID = 0;
+
+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
+ return new SILowerControlFlowPass(tm);
+}
+
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+
+ unsigned NumInstr = 0;
+
+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
+ MBB = *MBB->succ_begin()) {
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ NumInstr < SkipThreshold && I != E; ++I) {
+
+ if (I->isBundle() || !I->isBundled())
+ if (++NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
+ return;
+
+ DebugLoc DL = From.getDebugLoc();
+ BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addOperand(To)
+ .addReg(AMDGPU::EXEC);
+}
+
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ return;
+
+ MachineBasicBlock::iterator Insert = &MI;
+ ++Insert;
+
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
+void SILowerControlFlowPass::If(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
+ .addReg(Vcc);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Else(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+ .addReg(Src); // Saved EXEC
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Dst);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Break(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Vcc)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Saved = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Saved)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Src = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addOperand(MI.getOperand(1))
+ .addReg(AMDGPU::EXEC);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+ MachineBasicBlock *Next = MI.getParent()->getNextNode();
+ MachineBasicBlock *Target = MI.getOperand(0).getMBB();
+ if (Target == Next)
+ MI.eraseFromParent();
+ else
+ assert(0);
+}
+
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Kill is only allowed in pixel shaders
+ assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+ ShaderType::PIXEL);
+
+ // Clear this pixel from the exec mask if the operand is negative
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addOperand(MI.getOperand(0));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
+
+ bool HaveKill = false;
+ bool NeedWQM = false;
+ unsigned Depth = 0;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::SI_IF:
+ ++Depth;
+ If(MI);
+ break;
+
+ case AMDGPU::SI_ELSE:
+ Else(MI);
+ break;
+
+ case AMDGPU::SI_BREAK:
+ Break(MI);
+ break;
+
+ case AMDGPU::SI_IF_BREAK:
+ IfBreak(MI);
+ break;
+
+ case AMDGPU::SI_ELSE_BREAK:
+ ElseBreak(MI);
+ break;
+
+ case AMDGPU::SI_LOOP:
+ ++Depth;
+ Loop(MI);
+ break;
+
+ case AMDGPU::SI_END_CF:
+ if (--Depth == 0 && HaveKill) {
+ SkipIfDead(MI);
+ HaveKill = false;
+ }
+ EndCf(MI);
+ break;
+
+ case AMDGPU::SI_KILL:
+ if (Depth == 0)
+ SkipIfDead(MI);
+ else
+ HaveKill = true;
+ Kill(MI);
+ break;
+
+ case AMDGPU::S_BRANCH:
+ Branch(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
+
+ case AMDGPU::V_INTERP_P1_F32:
+ case AMDGPU::V_INTERP_P2_F32:
+ case AMDGPU::V_INTERP_MOV_F32:
+ NeedWQM = true;
+ break;
+
+ }
+ }
+ }
+
+ if (NeedWQM) {
+ MachineBasicBlock &MBB = MF.front();
+ BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+ AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+ }
+
+ return true;
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..ee0e30755f01
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF),
+ PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
new file mode 100644
index 000000000000..6da9f7f9a14d
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIMACHINEFUNCTIONINFO_H_
+#define SIMACHINEFUNCTIONINFO_H_
+
+#include "AMDGPUMachineFunction.h"
+
+namespace llvm {
+
+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
+/// tells the hardware which interpolation parameters to load.
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+ SIMachineFunctionInfo(const MachineFunction &MF);
+ unsigned PSInputAddr;
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
new file mode 100644
index 000000000000..99278ae8dceb
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -0,0 +1,53 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ return RC->getNumRegs();
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ return &AMDGPU::VReg_32RegClass;
+ default: return rc;
+ }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::VReg_32RegClass;
+ }
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
new file mode 100644
index 000000000000..caec22841345
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -0,0 +1,50 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct SIRegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the SI register class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass *RC) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+};
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
new file mode 100644
index 000000000000..4f14931a9c48
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -0,0 +1,182 @@
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+// Special Registers
+def VCC : SIReg<"VCC", 106>;
+def EXEC : SIReg<"EXEC", 126>;
+def SCC : SIReg<"SCC", 253>;
+def M0 : SIReg <"M0", 124>;
+
+// SGPR registers
+foreach Index = 0-101 in {
+ def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+ def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+ let HWEncoding{8} = 1;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+
+// SGPR 32-bit registers
+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "SGPR%u", 0, 101))>;
+
+// SGPR 64-bit registers
+def SGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (decimate (trunc SGPR_32, 101), 2)),
+ (add (decimate (shl SGPR_32, 1), 2))]>;
+
+// SGPR 128-bit registers
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (decimate (trunc SGPR_32, 99), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4))]>;
+
+// SGPR 256-bit registers
+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (decimate (trunc SGPR_32, 95), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (decimate (trunc SGPR_32, 87), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4)),
+ (add (decimate (shl SGPR_32, 8), 4)),
+ (add (decimate (shl SGPR_32, 9), 4)),
+ (add (decimate (shl SGPR_32, 10), 4)),
+ (add (decimate (shl SGPR_32, 11), 4)),
+ (add (decimate (shl SGPR_32, 12), 4)),
+ (add (decimate (shl SGPR_32, 13), 4)),
+ (add (decimate (shl SGPR_32, 14), 4)),
+ (add (decimate (shl SGPR_32, 15), 4))]>;
+
+// VGPR 32-bit registers
+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "VGPR%u", 0, 255))>;
+
+// VGPR 64-bit registers
+def VGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (trunc VGPR_32, 255)),
+ (add (shl VGPR_32, 1))]>;
+
+// VGPR 128-bit registers
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (trunc VGPR_32, 253)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3))]>;
+
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (trunc VGPR_32, 249)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (trunc VGPR_32, 241)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7)),
+ (add (shl VGPR_32, 8)),
+ (add (shl VGPR_32, 9)),
+ (add (shl VGPR_32, 10)),
+ (add (shl VGPR_32, 11)),
+ (add (shl VGPR_32, 12)),
+ (add (shl VGPR_32, 13)),
+ (add (shl VGPR_32, 14)),
+ (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+// Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+
+// Register class for all scalar registers (SGPRs + Special Registers)
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add SGPR_32, M0Reg)
+>;
+
+def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+ (add SGPR_64, VCCReg, EXECReg)
+>;
+
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
+
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
+
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+
+//===----------------------------------------------------------------------===//
+// [SV]Src_* register classes, can have either an immediate or an register
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
+
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
new file mode 100644
index 000000000000..28b65b825855
--- /dev/null
+++ b/lib/Target/R600/SISchedule.td
@@ -0,0 +1,15 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This is just a place holder for now.
+//
+//===----------------------------------------------------------------------===//
+
+
+def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 000000000000..46b1f18c6263
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// \brief The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// \brief Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeR600TargetInfo() {
+ RegisterTarget<Triple::r600, false>
+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/lib/Target/R600/TargetInfo/CMakeLists.txt b/lib/Target/R600/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..3d1584eba346
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600Info
+ AMDGPUTargetInfo.cpp
+ )
+
+add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/R600/TargetInfo/LLVMBuild.txt b/lib/Target/R600/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..4c6fea4aa08c
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Info
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
diff --git a/lib/Target/R600/TargetInfo/Makefile b/lib/Target/R600/TargetInfo/Makefile
new file mode 100644
index 000000000000..b8ac4e782302
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 8165f5b8cc97..a9aab86abdac 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -262,22 +262,7 @@ unsigned countbits_slow(unsigned v) {
c += v & 1;
return c;
}
-unsigned countbits_fast(unsigned v){
- unsigned c;
- for (c = 0; v; c++)
- v &= v - 1; // clear the least significant bit set
- return c;
-}
-BITBOARD = unsigned long long
-int PopCnt(register BITBOARD a) {
- register int c=0;
- while(a) {
- c++;
- a &= a - 1;
- }
- return c;
-}
unsigned int popcount(unsigned int input) {
unsigned int count = 0;
for (unsigned int i = 0; i < 4 * 8; i++)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 7bf8c3f85eca..6123773d5f4b 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -14,14 +14,14 @@
#define DEBUG_TYPE "delay-slot-filler"
#include "Sparc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 9a729bd87044..1325b98cf0ee 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -14,14 +14,14 @@
#define DEBUG_TYPE "fpmover"
#include "Sparc.h"
#include "SparcSubtarget.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
STATISTIC(NumFpDs , "Number of instructions translated");
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index f5e10fc3a465..3d4bfdcd5e6d 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -21,8 +21,9 @@ void SparcELFMCAsmInfo::anchor() { }
SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
IsLittleEndian = false;
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::sparcv9)
- PointerSize = 8;
+ if (TheTriple.getArch() == Triple::sparcv9) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 25548625e760..e14b3cbf161d 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,15 +16,15 @@
#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index d4712208126f..b38ac616dcf4 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -22,6 +22,14 @@ def RetCC_Sparc32 : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0, D1]>>
]>;
+// Sparc 64-bit C return-value convention.
+def RetCC_Sparc64 : CallingConv<[
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
// Sparc 32-bit C Calling convention.
def CC_Sparc32 : CallingConv<[
//Custom assign SRet to [sp+64].
@@ -34,3 +42,15 @@ def CC_Sparc32 : CallingConv<[
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
]>;
+
+// Sparc 64-bit C Calling convention.
+def CC_Sparc64 : CallingConv<[
+ // All integers are promoted to i64 by the caller.
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ // Integer arguments get passed in integer registers if there is space.
+ CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // FIXME: Floating point arguments.
+
+ // Alternatively, they are assigned to the stack in 8-byte aligned units.
+ CCAssignToStack<8, 8>
+]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 716c79f43a26..a0dae6e9480c 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -14,15 +14,15 @@
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcMachineFunctionInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -67,6 +67,22 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+void SparcFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+
void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 6b593c95bb10..464233e7da35 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -32,6 +32,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const { return false; }
};
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 93710c4e0b0f..5fa545d30160 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
-#include "llvm/Intrinsics.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 8e5619e6bc8d..325f13424b42 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -13,11 +13,8 @@
//===----------------------------------------------------------------------===//
#include "SparcISelLowering.h"
-#include "SparcTargetMachine.h"
#include "SparcMachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "SparcTargetMachine.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -25,6 +22,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -93,17 +93,13 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
DAG.getTarget(), RVLocs, *DAG.getContext());
// Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
-
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (MF.getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
+ CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
+ RetCC_Sparc64 : RetCC_Sparc32);
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ // Make room for the return address offset.
+ RetOps.push_back(SDValue());
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -115,6 +111,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
@@ -127,32 +124,47 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
Flag = Chain.getValue(1);
- if (MF.getRegInfo().liveout_empty())
- MF.getRegInfo().addLiveOut(SP::I0);
+ RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
- SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+ RetOps[0] = Chain; // Update chain.
+ RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32);
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
- RetAddrOffsetNode, Flag);
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
- RetAddrOffsetNode);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
-/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+SDValue SparcTargetLowering::
+LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget->is64Bit())
+ return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
+ DL, DAG, InVals);
+ return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
+ DL, DAG, InVals);
+}
+
+/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
/// passed in either one or two GPRs, including FP values. TODO: we should
/// pass FP values in FP registers for fastcc functions.
-SDValue
-SparcTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
+SDValue SparcTargetLowering::
+LowerFormalArguments_32(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -344,6 +356,63 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
return Chain;
}
+// Lower formal arguments for the 64 bit ABI.
+SDValue SparcTargetLowering::
+LowerFormalArguments_64(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Analyze arguments according to CC_Sparc64.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // This argument is passed in a register.
+ // All integer register arguments are promoted by the caller to i64.
+
+ // Create a virtual register for the promoted live-in value.
+ unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+ getRegClassFor(VA.getLocVT()));
+ SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+ // The caller promoted the argument, so insert an Assert?ext SDNode so we
+ // won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the argument type.
+ if (VA.isExtInLoc())
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ // The registers are exhausted. This argument was passed on the stack.
+ assert(VA.isMemLoc());
+ }
+ return Chain;
+}
+
SDValue
SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -692,11 +761,14 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ Subtarget = &TM.getSubtarget<SparcSubtarget>();
// Set up the register classes.
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
// Turn FP extload into load/fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -752,6 +824,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ }
+
// FIXME: There are instructions available for ATOMIC_FENCE
// on SparcV8 and later.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
@@ -759,10 +836,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
@@ -819,8 +898,10 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::CMPICC: return "SPISD::CMPICC";
case SPISD::CMPFCC: return "SPISD::CMPFCC";
case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRXCC: return "SPISD::BRXCC";
case SPISD::BRFCC: return "SPISD::BRFCC";
case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
case SPISD::Hi: return "SPISD::Hi";
case SPISD::Lo: return "SPISD::Lo";
@@ -847,6 +928,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
switch (Op.getOpcode()) {
default: break;
case SPISD::SELECT_ICC:
+ case SPISD::SELECT_XCC:
case SPISD::SELECT_FCC:
DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
@@ -867,7 +949,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
if (isa<ConstantSDNode>(RHS) &&
cast<ConstantSDNode>(RHS)->isNullValue() &&
CC == ISD::SETNE &&
- ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ (((LHS.getOpcode() == SPISD::SELECT_ICC ||
+ LHS.getOpcode() == SPISD::SELECT_XCC) &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
(LHS.getOpcode() == SPISD::SELECT_FCC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
@@ -955,14 +1038,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
// Get the condition flag.
SDValue CompareFlag;
- if (LHS.getValueType() == MVT::i32) {
- std::vector<EVT> VTs;
- VTs.push_back(MVT::i32);
- VTs.push_back(MVT::Glue);
+ if (LHS.getValueType().isInteger()) {
+ EVT VTs[] = { LHS.getValueType(), MVT::Glue };
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
- Opc = SPISD::BRICC;
+ // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+ Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
} else {
CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
@@ -986,13 +1068,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
LookThroughSetCC(LHS, RHS, CC, SPCC);
SDValue CompareFlag;
- if (LHS.getValueType() == MVT::i32) {
- std::vector<EVT> VTs;
- VTs.push_back(LHS.getValueType()); // subcc returns a value
- VTs.push_back(MVT::Glue);
+ if (LHS.getValueType().isInteger()) {
+ // subcc returns a value
+ EVT VTs[] = { LHS.getValueType(), MVT::Glue };
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
- Opc = SPISD::SELECT_ICC;
+ Opc = LHS.getValueType() == MVT::i32 ?
+ SPISD::SELECT_ICC : SPISD::SELECT_XCC;
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
} else {
CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 09148ea54027..aa2ef711a080 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -19,14 +19,18 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
+ class SparcSubtarget;
+
namespace SPISD {
enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CMPICC, // Compare two GPR operands, set icc.
+ CMPICC, // Compare two GPR operands, set icc+xcc.
CMPFCC, // Compare two FP operands, set fcc.
BRICC, // Branch to dest on icc condition
+ BRXCC, // Branch to dest on xcc condition (64-bit only).
BRFCC, // Branch to dest on fcc condition
SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_XCC, // Select between two values using the current XCC flags.
SELECT_FCC, // Select between two values using the current FCC flags.
Hi, Lo, // Hi/Lo operations, typically on a global address.
@@ -42,6 +46,7 @@ namespace llvm {
}
class SparcTargetLowering : public TargetLowering {
+ const SparcSubtarget *Subtarget;
public:
SparcTargetLowering(TargetMachine &TM);
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -74,6 +79,18 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerFormalArguments_32(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerFormalArguments_64(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
new file mode 100644
index 000000000000..ca1153b3fe8f
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -0,0 +1,285 @@
+//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction definitions and patterns needed for 64-bit
+// code generation on SPARC v9.
+//
+// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can
+// also be used in 32-bit code running on a SPARC v9 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are don't care.
+// This give us free trunc and anyext.
+def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>;
+def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Shift Instructions.
+//===----------------------------------------------------------------------===//
+//
+// The 32-bit shift instructions are still available. The left shift srl
+// instructions shift all 64 bits, but it only accepts a 5-bit shift amount.
+//
+// The srl instructions only shift the low 32 bits and clear the high 32 bits.
+// Finally, sra shifts the low 32 bits and sign-extends to 64 bits.
+
+let Predicates = [Is64Bit] in {
+
+def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Immediates.
+//===----------------------------------------------------------------------===//
+//
+// All 32-bit immediates can be materialized with sethi+or, but 64-bit
+// immediates may require more code. There may be a point where it is
+// preferable to use a constant pool load instead, depending on the
+// microarchitecture.
+
+// The %g0 register is constant 0.
+// This is useful for stx %g0, [...], for example.
+def : Pat<(i64 0), (i64 G0)>, Requires<[Is64Bit]>;
+
+// Single-instruction patterns.
+
+// The ALU instructions want their simm13 operands as i32 immediates.
+def as_i32imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+}]>;
+def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
+def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
+
+// Double-instruction patterns.
+
+// All unsigned i32 immediates can be handled by sethi+or.
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>,
+ Requires<[Is64Bit]>;
+
+// All negative i33 immediates can be handled by sethi+xor.
+def nimm33 : PatLeaf<(imm), [{
+ int64_t Imm = N->getSExtValue();
+ return Imm < 0 && isInt<33>(Imm);
+}]>;
+// Bits 10-31 inverted. Same as assembler's %hix.
+def HIX22 : SDNodeXForm<imm, [{
+ uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
+def LOX10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+}]>;
+def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
+ Requires<[Is64Bit]>;
+
+// More possible patterns:
+//
+// (sllx sethi, n)
+// (sllx simm13, n)
+//
+// 3 instrs:
+//
+// (xor (sllx sethi), simm13)
+// (sllx (xor sethi, simm13))
+//
+// 4 instrs:
+//
+// (or sethi, (sllx sethi))
+// (xnor sethi, (sllx sethi))
+//
+// 5 instrs:
+//
+// (or (sllx sethi), (or sethi, simm13))
+// (xnor (sllx sethi), (or sethi, simm13))
+// (or (sllx sethi), (sllx sethi))
+// (xnor (sllx sethi), (sllx sethi))
+//
+// Worst case is 6 instrs:
+//
+// (or (sllx (or sethi, simmm13)), (or sethi, simm13))
+
+// Bits 42-63, same as assembler's %hh.
+def HH22 : SDNodeXForm<imm, [{
+ uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 32-41, same as assembler's %hm.
+def HM10 : SDNodeXForm<imm, [{
+ uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
+ return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+def : Pat<(i64 imm:$val),
+ (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+ (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
+ Requires<[Is64Bit]>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Arithmetic and Logic.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+// Register-register instructions.
+
+def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>;
+def : Pat<(or i64:$a, i64:$b), (ORrr $a, $b)>;
+def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>;
+
+def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>;
+def : Pat<(or i64:$a, (not i64:$b)), (ORNrr $a, $b)>;
+def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
+
+def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
+def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
+
+// Add/sub with carry were renamed to addc/subc in SPARC v9.
+def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
+def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
+
+def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
+def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+def : Pat<(SPcmpicc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+// Register-immediate instructions.
+
+def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
+def : Pat<(or i64:$a, (i64 simm13:$b)), (ORri $a, (as_i32imm $b))>;
+def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>;
+
+def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>;
+def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
+
+def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Loads and Stores.
+//===----------------------------------------------------------------------===//
+//
+// All the 32-bit loads and stores are available. The extending loads are sign
+// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits
+// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned
+// Word).
+//
+// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads.
+
+let Predicates = [Is64Bit] in {
+
+// 64-bit loads.
+def LDXrr : F3_1<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMrr:$addr),
+ "ldx [$addr], $dst",
+ [(set i64:$dst, (load ADDRrr:$addr))]>;
+def LDXri : F3_2<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "ldx [$addr], $dst",
+ [(set i64:$dst, (load ADDRri:$addr))]>;
+
+// Extending loads to i64.
+def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+
+// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
+def LDSWrr : F3_1<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMrr:$addr),
+ "ldsw [$addr], $dst",
+ [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>;
+def LDSWri : F3_2<3, 0b001011,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "ldsw [$addr], $dst",
+ [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>;
+
+// 64-bit stores.
+def STXrr : F3_1<3, 0b001110,
+ (outs), (ins MEMrr:$addr, I64Regs:$src),
+ "stx $src, [$addr]",
+ [(store i64:$src, ADDRrr:$addr)]>;
+def STXri : F3_2<3, 0b001110,
+ (outs), (ins MEMri:$addr, I64Regs:$src),
+ "stx $src, [$addr]",
+ [(store i64:$src, ADDRri:$addr)]>;
+
+// Truncating stores from i64 are identical to the i32 stores.
+def : Pat<(truncstorei8 i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei8 i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri ADDRri:$addr, $src)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Conditionals.
+//===----------------------------------------------------------------------===//
+//
+// Flag-setting instructions like subcc and addcc set both icc and xcc flags.
+// The icc flags correspond to the 32-bit result, and the xcc are for the
+// full 64-bit result.
+//
+// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// 64-bit compares. See LowerBR_CC.
+
+let Predicates = [Is64Bit] in {
+
+let Uses = [ICC] in
+def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "bp$cc %xcc, $dst",
+ [(SPbrxcc bb:$dst, imm:$cc)]>;
+
+// Conditional moves on %xcc.
+let Uses = [ICC], Constraints = "$f = $rd" in {
+def MOVXCCrr : Pseudo<(outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %xcc, $rs2, $rd",
+ [(set i32:$rd,
+ (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>;
+def MOVXCCri : Pseudo<(outs IntRegs:$rd),
+ (ins i32imm:$i, IntRegs:$f, CCOp:$cond),
+ "mov$cond %xcc, $i, $rd",
+ [(set i32:$rd,
+ (SPselecticc simm11:$i, i32:$f, imm:$cond))]>;
+} // Uses, Constraints
+
+def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
+ (MOVXCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
+ (MOVXCCri (as_i32imm $t), $f, imm:$cond)>;
+
+} // Predicates = [Is64Bit]
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index dce331228b8f..f1018569153c 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -111,4 +111,41 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
let Inst{4-0} = rs2;
}
+// Shift by register rs2.
+class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bit x = xVal; // 1 for 64-bit shifts.
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12} = x; // extended registers.
+ let Inst{4-0} = rs2;
+}
+// Shift by immediate.
+class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bit x = xVal; // 1 for 64-bit shifts.
+ bits<6> shcnt; // shcnt32 / shcnt64.
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12} = x; // extended registers.
+ let Inst{5-0} = shcnt;
+}
+
+// Define rr and ri shift instructions with patterns.
+multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
+ ValueType VT, RegisterClass RC> {
+ def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+ !strconcat(OpcStr, " $rs, $rs2, $rd"),
+ [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
+ def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+ !strconcat(OpcStr, " $rs, $shcnt, $rd"),
+ [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index f8674d0bd660..39d7329f2663 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -15,12 +15,12 @@
#include "Sparc.h"
#include "SparcMachineFunctionInfo.h"
#include "SparcSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_INSTRINFO_CTOR
#include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index e64c140e4921..5ff439583c5c 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -21,6 +21,12 @@ include "SparcInstrFormats.td"
// Feature predicates.
//===----------------------------------------------------------------------===//
+// True when generating 32-bit code.
+def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+
+// True when generating 64-bit code. This also implies HasV9.
+def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+
// HasV9 - This predicate is true when the target processor supports V9
// instructions. Note that the machine may be running in 32-bit mode.
def HasV9 : Predicate<"Subtarget.isV9()">;
@@ -63,17 +69,17 @@ def SETHIimm : PatLeaf<(imm), [{
}], HI22>;
// Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
// Address operands
-def MEMrr : Operand<i32> {
+def MEMrr : Operand<iPTR> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
}
-def MEMri : Operand<i32> {
+def MEMri : Operand<iPTR> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops IntRegs, i32imm);
+ let MIOperandInfo = (ops ptr_rc, i32imm);
}
// Branch targets have OtherVT type.
@@ -98,6 +104,7 @@ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
@@ -107,6 +114,7 @@ def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
// These are target-independent nodes, but have target-specific formats.
@@ -126,7 +134,7 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall,
def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
@@ -182,11 +190,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
def rr : F3_1<2, Op3Val,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat(OpcStr, " $b, $c, $dst"),
- [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
def ri : F3_2<2, Op3Val,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
!strconcat(OpcStr, " $b, $c, $dst"),
- [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+ [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>;
}
/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
@@ -243,10 +251,10 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
"!FpMOVD $src, $dst", []>;
def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
"!FpNEGD $src, $dst",
- [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ [(set f64:$dst, (fneg f64:$src))]>;
def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
"!FpABSD $src, $dst",
- [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+ [(set f64:$dst, (fabs f64:$src))]>;
}
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
@@ -257,19 +265,16 @@ let Uses = [ICC], usesCustomInserter = 1 in {
def SELECT_CC_Int_ICC
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
"; SELECT_CC_Int_ICC PSEUDO!",
- [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
- imm:$Cond))]>;
+ [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
def SELECT_CC_FP_ICC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
"; SELECT_CC_FP_ICC PSEUDO!",
- [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
- imm:$Cond))]>;
+ [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
def SELECT_CC_DFP_ICC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_ICC PSEUDO!",
- [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
- imm:$Cond))]>;
+ [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
}
let usesCustomInserter = 1, Uses = [FCC] in {
@@ -277,19 +282,16 @@ let usesCustomInserter = 1, Uses = [FCC] in {
def SELECT_CC_Int_FCC
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
"; SELECT_CC_Int_FCC PSEUDO!",
- [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
- imm:$Cond))]>;
+ [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
def SELECT_CC_FP_FCC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
"; SELECT_CC_FP_FCC PSEUDO!",
- [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
- imm:$Cond))]>;
+ [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
def SELECT_CC_DFP_FCC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_FCC PSEUDO!",
- [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
- imm:$Cond))]>;
+ [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
}
@@ -309,111 +311,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
def LDSBrr : F3_1<3, 0b001001,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldsb [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+ [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
def LDSBri : F3_2<3, 0b001001,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldsb [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+ [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
def LDSHrr : F3_1<3, 0b001010,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldsh [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+ [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
def LDSHri : F3_2<3, 0b001010,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldsh [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+ [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
def LDUBrr : F3_1<3, 0b000001,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldub [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+ [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
def LDUBri : F3_2<3, 0b000001,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldub [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+ [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
def LDUHrr : F3_1<3, 0b000010,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"lduh [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+ [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
def LDUHri : F3_2<3, 0b000010,
(outs IntRegs:$dst), (ins MEMri:$addr),
"lduh [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+ [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
def LDrr : F3_1<3, 0b000000,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ld [$addr], $dst",
- [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set i32:$dst, (load ADDRrr:$addr))]>;
def LDri : F3_2<3, 0b000000,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ld [$addr], $dst",
- [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+ [(set i32:$dst, (load ADDRri:$addr))]>;
// Section B.2 - Load Floating-point Instructions, p. 92
def LDFrr : F3_1<3, 0b100000,
(outs FPRegs:$dst), (ins MEMrr:$addr),
"ld [$addr], $dst",
- [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set f32:$dst, (load ADDRrr:$addr))]>;
def LDFri : F3_2<3, 0b100000,
(outs FPRegs:$dst), (ins MEMri:$addr),
"ld [$addr], $dst",
- [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+ [(set f32:$dst, (load ADDRri:$addr))]>;
def LDDFrr : F3_1<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMrr:$addr),
"ldd [$addr], $dst",
- [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set f64:$dst, (load ADDRrr:$addr))]>;
def LDDFri : F3_2<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMri:$addr),
"ldd [$addr], $dst",
- [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+ [(set f64:$dst, (load ADDRri:$addr))]>;
// Section B.4 - Store Integer Instructions, p. 95
def STBrr : F3_1<3, 0b000101,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"stb $src, [$addr]",
- [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+ [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
def STBri : F3_2<3, 0b000101,
(outs), (ins MEMri:$addr, IntRegs:$src),
"stb $src, [$addr]",
- [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+ [(truncstorei8 i32:$src, ADDRri:$addr)]>;
def STHrr : F3_1<3, 0b000110,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"sth $src, [$addr]",
- [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+ [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
def STHri : F3_2<3, 0b000110,
(outs), (ins MEMri:$addr, IntRegs:$src),
"sth $src, [$addr]",
- [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+ [(truncstorei16 i32:$src, ADDRri:$addr)]>;
def STrr : F3_1<3, 0b000100,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"st $src, [$addr]",
- [(store IntRegs:$src, ADDRrr:$addr)]>;
+ [(store i32:$src, ADDRrr:$addr)]>;
def STri : F3_2<3, 0b000100,
(outs), (ins MEMri:$addr, IntRegs:$src),
"st $src, [$addr]",
- [(store IntRegs:$src, ADDRri:$addr)]>;
+ [(store i32:$src, ADDRri:$addr)]>;
// Section B.5 - Store Floating-point Instructions, p. 97
def STFrr : F3_1<3, 0b100100,
(outs), (ins MEMrr:$addr, FPRegs:$src),
"st $src, [$addr]",
- [(store FPRegs:$src, ADDRrr:$addr)]>;
+ [(store f32:$src, ADDRrr:$addr)]>;
def STFri : F3_2<3, 0b100100,
(outs), (ins MEMri:$addr, FPRegs:$src),
"st $src, [$addr]",
- [(store FPRegs:$src, ADDRri:$addr)]>;
+ [(store f32:$src, ADDRri:$addr)]>;
def STDFrr : F3_1<3, 0b100111,
(outs), (ins MEMrr:$addr, DFPRegs:$src),
"std $src, [$addr]",
- [(store DFPRegs:$src, ADDRrr:$addr)]>;
+ [(store f64:$src, ADDRrr:$addr)]>;
def STDFri : F3_2<3, 0b100111,
(outs), (ins MEMri:$addr, DFPRegs:$src),
"std $src, [$addr]",
- [(store DFPRegs:$src, ADDRri:$addr)]>;
+ [(store f64:$src, ADDRri:$addr)]>;
// Section B.9 - SETHI Instruction, p. 104
def SETHIi: F2_1<0b100,
(outs IntRegs:$dst), (ins i32imm:$src),
"sethi $src, $dst",
- [(set IntRegs:$dst, SETHIimm:$src)]>;
+ [(set i32:$dst, SETHIimm:$src)]>;
// Section B.10 - NOP Instruction, p. 105
// (It's a special case of SETHI)
@@ -426,7 +428,7 @@ defm AND : F3_12<"and", 0b000001, and>;
def ANDNrr : F3_1<2, 0b000101,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"andn $b, $c, $dst",
- [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+ [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
def ANDNri : F3_2<2, 0b000101,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"andn $b, $c, $dst", []>;
@@ -436,7 +438,7 @@ defm OR : F3_12<"or", 0b000010, or>;
def ORNrr : F3_1<2, 0b000110,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"orn $b, $c, $dst",
- [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+ [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
def ORNri : F3_2<2, 0b000110,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"orn $b, $c, $dst", []>;
@@ -445,7 +447,7 @@ defm XOR : F3_12<"xor", 0b000011, xor>;
def XNORrr : F3_1<2, 0b000111,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"xnor $b, $c, $dst",
- [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+ [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
def XNORri : F3_2<2, 0b000111,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"xnor $b, $c, $dst", []>;
@@ -462,7 +464,7 @@ defm ADD : F3_12<"add", 0b000000, add>;
def LEA_ADDri : F3_2<2, 0b000000,
(outs IntRegs:$dst), (ins MEMri:$addr),
"add ${addr:arith}, $dst",
- [(set IntRegs:$dst, ADDRri:$addr)]>;
+ [(set i32:$dst, ADDRri:$addr)]>;
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
@@ -603,11 +605,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010,
def FSTOD : F3_3<2, 0b110100, 0b011001001,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fstod $src, $dst",
- [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+ [(set f64:$dst, (fextend f32:$src))]>;
def FDTOS : F3_3<2, 0b110100, 0b011000110,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtos $src, $dst",
- [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+ [(set f32:$dst, (fround f64:$src))]>;
// Floating-point Move Instructions, p. 144
def FMOVS : F3_3<2, 0b110100, 0b000000001,
@@ -616,22 +618,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001,
def FNEGS : F3_3<2, 0b110100, 0b000000101,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fnegs $src, $dst",
- [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+ [(set f32:$dst, (fneg f32:$src))]>;
def FABSS : F3_3<2, 0b110100, 0b000001001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fabss $src, $dst",
- [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+ [(set f32:$dst, (fabs f32:$src))]>;
// Floating-point Square Root Instructions, p.145
def FSQRTS : F3_3<2, 0b110100, 0b000101001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fsqrts $src, $dst",
- [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+ [(set f32:$dst, (fsqrt f32:$src))]>;
def FSQRTD : F3_3<2, 0b110100, 0b000101010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fsqrtd $src, $dst",
- [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+ [(set f64:$dst, (fsqrt f64:$src))]>;
@@ -639,42 +641,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010,
def FADDS : F3_3<2, 0b110100, 0b001000001,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fadds $src1, $src2, $dst",
- [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
def FADDD : F3_3<2, 0b110100, 0b001000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"faddd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
def FSUBS : F3_3<2, 0b110100, 0b001000101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsubs $src1, $src2, $dst",
- [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
def FSUBD : F3_3<2, 0b110100, 0b001000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fsubd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
// Floating-point Multiply and Divide Instructions, p. 147
def FMULS : F3_3<2, 0b110100, 0b001001001,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fmuls $src1, $src2, $dst",
- [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
def FMULD : F3_3<2, 0b110100, 0b001001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fmuld $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsmuld $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
- (fextend FPRegs:$src2)))]>;
+ [(set f64:$dst, (fmul (fextend f32:$src1),
+ (fextend f32:$src2)))]>;
def FDIVS : F3_3<2, 0b110100, 0b001001101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fdivs $src1, $src2, $dst",
- [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
def FDIVD : F3_3<2, 0b110100, 0b001001110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fdivd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
// Floating-point Compare Instructions, p. 148
// Note: the 2nd template arg is different for these guys.
@@ -685,11 +687,11 @@ let Defs = [FCC] in {
def FCMPS : F3_3<2, 0b110101, 0b001010001,
(outs), (ins FPRegs:$src1, FPRegs:$src2),
"fcmps $src1, $src2\n\tnop",
- [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+ [(SPcmpfcc f32:$src1, f32:$src2)]>;
def FCMPD : F3_3<2, 0b110101, 0b001010010,
(outs), (ins DFPRegs:$src1, DFPRegs:$src2),
"fcmpd $src1, $src2\n\tnop",
- [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+ [(SPcmpfcc f64:$src1, f64:$src2)]>;
}
//===----------------------------------------------------------------------===//
@@ -704,52 +706,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in {
def MOVICCrr
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
"mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>;
def MOVICCri
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
"mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>;
}
let Uses = [FCC] in {
def MOVFCCrr
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
"mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>;
def MOVFCCri
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
"mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>;
}
let Uses = [ICC] in {
def FMOVS_ICC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
"fmovs$cc %icc, $F, $dst",
- [(set FPRegs:$dst,
- (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ [(set f32:$dst,
+ (SPselecticc f32:$F, f32:$T, imm:$cc))]>;
def FMOVD_ICC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
"fmovd$cc %icc, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>;
}
let Uses = [FCC] in {
def FMOVS_FCC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
"fmovs$cc %fcc0, $F, $dst",
- [(set FPRegs:$dst,
- (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>;
def FMOVD_FCC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
"fmovd$cc %fcc0, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>;
}
}
@@ -762,11 +757,11 @@ let Predicates = [HasV9] in {
def FNEGD : F3_3<2, 0b110100, 0b000000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fnegd $src, $dst",
- [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ [(set f64:$dst, (fneg f64:$src))]>;
def FABSD : F3_3<2, 0b110100, 0b000001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fabsd $src, $dst",
- [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+ [(set f64:$dst, (fabs f64:$src))]>;
}
// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
@@ -774,8 +769,8 @@ let Predicates = [HasV9] in {
def POPCrr : F3_1<2, 0b101110,
(outs IntRegs:$dst), (ins IntRegs:$src),
"popc $src, $dst", []>, Requires<[HasV9]>;
-def : Pat<(ctpop IntRegs:$src),
- (POPCrr (SLLri IntRegs:$src, 0))>;
+def : Pat<(ctpop i32:$src),
+ (POPCrr (SLLri $src, 0))>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -783,28 +778,28 @@ def : Pat<(ctpop IntRegs:$src),
// Small immediates.
def : Pat<(i32 simm13:$val),
- (ORri G0, imm:$val)>;
+ (ORri (i32 G0), imm:$val)>;
// Arbitrary immediates.
def : Pat<(i32 imm:$val),
(ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
// subc
-def : Pat<(subc IntRegs:$b, IntRegs:$c),
- (SUBCCrr IntRegs:$b, IntRegs:$c)>;
-def : Pat<(subc IntRegs:$b, simm13:$val),
- (SUBCCri IntRegs:$b, imm:$val)>;
+def : Pat<(subc i32:$b, i32:$c),
+ (SUBCCrr $b, $c)>;
+def : Pat<(subc i32:$b, simm13:$val),
+ (SUBCCri $b, imm:$val)>;
// Global addresses, constant pool entries
def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
-def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
-def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
// Add reg, lo. This is used when taking the addr of a global/constpool entry.
-def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
- (ADDri IntRegs:$r, tglobaladdr:$in)>;
-def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
- (ADDri IntRegs:$r, tconstpool:$in)>;
+def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
+ (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
+ (ADDri $r, tconstpool:$in)>;
// Calls:
def : Pat<(call tglobaladdr:$dst),
@@ -823,3 +818,5 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
// zextload bool -> zextload byte
def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+include "SparcInstr64Bit.td"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index ff8d3c533f3d..db9b30eb4330 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -14,14 +14,14 @@
#include "SparcRegisterInfo.h"
#include "Sparc.h"
#include "SparcSubtarget.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_REGINFO_TARGET_DESC
#include "SparcGenRegisterInfo.inc"
@@ -56,45 +56,33 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-void SparcRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
- DebugLoc dl = MI.getDebugLoc();
- int Size = MI.getOperand(0).getImm();
- if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
- Size = -Size;
- if (Size)
- BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
- MBB.erase(I);
+const TargetRegisterClass*
+SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
}
void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+1).getImm();
+ MI.getOperand(FIOperandNum + 1).getImm();
// Replace frame index with a frame pointer reference.
if (Offset >= -4096 && Offset <= 4095) {
// If the offset is small enough to fit in the immediate field, directly
// encode it.
- MI.getOperand(i).ChangeToRegister(SP::I6, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
} else {
// Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
// scavenge a register here instead of reserving G1 all of the time.
@@ -104,8 +92,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
.addReg(SP::I6);
// Insert: G1+%lo(offset) into the user.
- MI.getOperand(i).ChangeToRegister(SP::G1, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
}
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9515ad33dcc2..f91df5398953 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -36,14 +36,15 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 81bff6c51c9d..497e7c5d5612 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -43,7 +43,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
}
// Control Registers
-def ICC : SparcCtrlReg<"ICC">;
+def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
def FCC : SparcCtrlReg<"FCC">;
// Y register
@@ -140,7 +140,10 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
//
-def IntRegs : RegisterClass<"SP", [i32], 32,
+// This register class should not be used to hold i64 values, use the I64Regs
+// register class for that. The i64 type is included here to allow i64 patterns
+// using the integer instructions.
+def IntRegs : RegisterClass<"SP", [i32, i64], 32,
(add L0, L1, L2, L3, L4, L5, L6,
L7, I0, I1, I2, I3, I4, I5,
O0, O1, O2, O3, O4, O5, O7,
@@ -155,6 +158,13 @@ def IntRegs : RegisterClass<"SP", [i32], 32,
G5, G6, G7 // reserved for kernel
)>;
+// Register class for 64-bit mode, with a 64-bit spill slot size.
+// These are the same as the 32-bit registers, so TableGen will consider this
+// to be a sub-class of IntRegs. That works out because requiring a 64-bit
+// spill slot is a stricter constraint than only requiring a 32-bit spill slot.
+def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
+
+// Floating point register classes.
def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 45c962471dda..60bceb708fbc 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -12,8 +12,8 @@
#include "SparcTargetMachine.h"
#include "Sparc.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) {
+ FrameLowering(Subtarget) {
}
namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 0fbe2d7cda36..081075de2dc8 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,15 +14,14 @@
#ifndef SPARCTARGETMACHINE_H
#define SPARCTARGETMACHINE_H
-#include "SparcInstrInfo.h"
-#include "SparcISelLowering.h"
#include "SparcFrameLowering.h"
+#include "SparcISelLowering.h"
+#include "SparcInstrInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -55,12 +52,6 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index c9d5b7bdfb3d..bb714632349a 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Sparc.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 393178a4692e..9a78ebc3facb 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,11 +14,11 @@
#include "llvm-c/Target.h"
#include "llvm-c/Initialization.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/LLVMContext.h"
#include <cstring>
using namespace llvm;
@@ -26,7 +26,6 @@ using namespace llvm;
void llvm::initializeTarget(PassRegistry &Registry) {
initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
- initializeTargetTransformInfoPass(Registry);
}
void LLVMInitializeTarget(LLVMPassRegistryRef R) {
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
deleted file mode 100644
index f1d1d07c38ae..000000000000
--- a/lib/Target/TargetInstrInfo.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cctype>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// TargetInstrInfo
-//
-// Methods that depend on CodeGen are implemented in
-// TargetInstrInfoImpl.cpp. Invoking them without linking libCodeGen raises a
-// link error.
-// ===----------------------------------------------------------------------===//
-
-TargetInstrInfo::~TargetInstrInfo() {
-}
-
-const TargetRegisterClass*
-TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
- const TargetRegisterInfo *TRI,
- const MachineFunction &MF) const {
- if (OpNum >= MCID.getNumOperands())
- return 0;
-
- short RegClass = MCID.OpInfo[OpNum].RegClass;
- if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
- return TRI->getPointerRegClass(MF, RegClass);
-
- // Instructions like INSERT_SUBREG do not have fixed register classes.
- if (RegClass < 0)
- return 0;
-
- // Otherwise just look it up normally.
- return TRI->getRegClass(RegClass);
-}
-
-/// insertNoop - Insert a noop into the instruction stream at the specified
-/// point.
-void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- llvm_unreachable("Target didn't implement insertNoop!");
-}
-
-/// Measure the specified inline asm to determine an approximation of its
-/// length.
-/// Comments (which run till the next SeparatorString or newline) do not
-/// count as an instruction.
-/// Any other non-whitespace text is considered an instruction, with
-/// multiple instructions separated by SeparatorString or newlines.
-/// Variable-length instructions are not handled here; this function
-/// may be overloaded in the target code to do that.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
-
-
- // Count the number of instructions in the asm.
- bool atInsnStart = true;
- unsigned Length = 0;
- for (; *Str; ++Str) {
- if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
- strlen(MAI.getSeparatorString())) == 0)
- atInsnStart = true;
- if (atInsnStart && !std::isspace(*Str)) {
- Length += MAI.getMaxInstLength();
- atInsnStart = false;
- }
- if (atInsnStart && strncmp(Str, MAI.getCommentString(),
- strlen(MAI.getCommentString())) == 0)
- atInsnStart = false;
- }
-
- return Length;
-}
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index e049a1d3b62f..64bd56f6e7df 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Function.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
TargetIntrinsicInfo::TargetIntrinsicInfo() {
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 6d4eab12045c..ee88ce77c09f 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,6 +24,8 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "_IO_getc",
+ "_IO_putc",
"_ZdaPv",
"_ZdlPv",
"_Znaj",
@@ -38,7 +40,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__cxa_guard_abort",
"__cxa_guard_acquire",
"__cxa_guard_release",
+ "__isoc99_scanf",
+ "__isoc99_sscanf",
"__memcpy_chk",
+ "__strdup",
+ "__strndup",
+ "__strtok_r",
+ "abs",
+ "access",
"acos",
"acosf",
"acosh",
@@ -60,6 +69,13 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"atanhf",
"atanhl",
"atanl",
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "bcmp",
+ "bcopy",
+ "bzero",
"calloc",
"cbrt",
"cbrtf",
@@ -67,6 +83,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"ceil",
"ceilf",
"ceill",
+ "chmod",
+ "chown",
+ "clearerr",
+ "closedir",
"copysign",
"copysignf",
"copysignl",
@@ -76,6 +96,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"coshf",
"coshl",
"cosl",
+ "ctermid",
"exp",
"exp10",
"exp10f",
@@ -91,18 +112,67 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"fabs",
"fabsf",
"fabsl",
+ "fclose",
+ "fdopen",
+ "feof",
+ "ferror",
+ "fflush",
+ "ffs",
+ "ffsl",
+ "ffsll",
+ "fgetc",
+ "fgetpos",
+ "fgets",
+ "fileno",
"fiprintf",
+ "flockfile",
"floor",
"floorf",
"floorl",
"fmod",
"fmodf",
"fmodl",
+ "fopen",
+ "fopen64",
+ "fprintf",
"fputc",
"fputs",
+ "fread",
"free",
+ "frexp",
+ "frexpf",
+ "frexpl",
+ "fscanf",
+ "fseek",
+ "fseeko",
+ "fseeko64",
+ "fsetpos",
+ "fstat",
+ "fstat64",
+ "fstatvfs",
+ "fstatvfs64",
+ "ftell",
+ "ftello",
+ "ftello64",
+ "ftrylockfile",
+ "funlockfile",
"fwrite",
+ "getc",
+ "getc_unlocked",
+ "getchar",
+ "getenv",
+ "getitimer",
+ "getlogin_r",
+ "getpwnam",
+ "gets",
+ "htonl",
+ "htons",
"iprintf",
+ "isascii",
+ "isdigit",
+ "labs",
+ "lchown",
+ "llabs",
"log",
"log10",
"log10f",
@@ -118,30 +188,64 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"logbl",
"logf",
"logl",
+ "lstat",
+ "lstat64",
"malloc",
+ "memalign",
+ "memccpy",
"memchr",
"memcmp",
"memcpy",
"memmove",
+ "memrchr",
"memset",
"memset_pattern16",
+ "mkdir",
+ "mktime",
+ "modf",
+ "modff",
+ "modfl",
"nearbyint",
"nearbyintf",
"nearbyintl",
+ "ntohl",
+ "ntohs",
+ "open",
+ "open64",
+ "opendir",
+ "pclose",
+ "perror",
+ "popen",
"posix_memalign",
"pow",
"powf",
"powl",
+ "pread",
+ "printf",
+ "putc",
"putchar",
"puts",
+ "pwrite",
+ "qsort",
+ "read",
+ "readlink",
"realloc",
"reallocf",
+ "realpath",
+ "remove",
+ "rename",
+ "rewind",
"rint",
"rintf",
"rintl",
+ "rmdir",
"round",
"roundf",
"roundl",
+ "scanf",
+ "setbuf",
+ "setitimer",
+ "setvbuf",
"sin",
"sinf",
"sinh",
@@ -149,17 +253,28 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"sinhl",
"sinl",
"siprintf",
+ "snprintf",
+ "sprintf",
"sqrt",
"sqrtf",
"sqrtl",
+ "sscanf",
+ "stat",
+ "stat64",
+ "statvfs",
+ "statvfs64",
"stpcpy",
+ "stpncpy",
+ "strcasecmp",
"strcat",
"strchr",
"strcmp",
+ "strcoll",
"strcpy",
"strcspn",
"strdup",
"strlen",
+ "strncasecmp",
"strncat",
"strncmp",
"strncpy",
@@ -171,21 +286,43 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"strstr",
"strtod",
"strtof",
+ "strtok",
+ "strtok_r",
"strtol",
"strtold",
"strtoll",
"strtoul",
"strtoull",
+ "strxfrm",
+ "system",
"tan",
"tanf",
"tanh",
"tanhf",
"tanhl",
"tanl",
+ "times",
+ "tmpfile",
+ "tmpfile64",
+ "toascii",
"trunc",
"truncf",
"truncl",
- "valloc"
+ "uname",
+ "ungetc",
+ "unlink",
+ "unsetenv",
+ "utime",
+ "utimes",
+ "valloc",
+ "vfprintf",
+ "vfscanf",
+ "vprintf",
+ "vscanf",
+ "vsnprintf",
+ "vsprintf",
+ "vsscanf",
+ "write"
};
/// initialize - Initialize the set of available library functions based on the
@@ -247,7 +384,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::fabsl);
TLI.setUnavailable(LibFunc::floorl);
TLI.setUnavailable(LibFunc::fmodl);
+ TLI.setUnavailable(LibFunc::frexpl);
TLI.setUnavailable(LibFunc::logl);
+ TLI.setUnavailable(LibFunc::modfl);
TLI.setUnavailable(LibFunc::powl);
TLI.setUnavailable(LibFunc::sinl);
TLI.setUnavailable(LibFunc::sinhl);
@@ -324,9 +463,116 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::tanhf);
}
- // Win32 does *not* provide stpcpy. It is provided on POSIX systems:
- // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
+ // Win32 does *not* provide provide these functions, but they are
+ // generally available on POSIX-compliant systems:
+ TLI.setUnavailable(LibFunc::access);
+ TLI.setUnavailable(LibFunc::bcmp);
+ TLI.setUnavailable(LibFunc::bcopy);
+ TLI.setUnavailable(LibFunc::bzero);
+ TLI.setUnavailable(LibFunc::chmod);
+ TLI.setUnavailable(LibFunc::chown);
+ TLI.setUnavailable(LibFunc::closedir);
+ TLI.setUnavailable(LibFunc::ctermid);
+ TLI.setUnavailable(LibFunc::fdopen);
+ TLI.setUnavailable(LibFunc::ffs);
+ TLI.setUnavailable(LibFunc::fileno);
+ TLI.setUnavailable(LibFunc::flockfile);
+ TLI.setUnavailable(LibFunc::fseeko);
+ TLI.setUnavailable(LibFunc::fstat);
+ TLI.setUnavailable(LibFunc::fstatvfs);
+ TLI.setUnavailable(LibFunc::ftello);
+ TLI.setUnavailable(LibFunc::ftrylockfile);
+ TLI.setUnavailable(LibFunc::funlockfile);
+ TLI.setUnavailable(LibFunc::getc_unlocked);
+ TLI.setUnavailable(LibFunc::getitimer);
+ TLI.setUnavailable(LibFunc::getlogin_r);
+ TLI.setUnavailable(LibFunc::getpwnam);
+ TLI.setUnavailable(LibFunc::htonl);
+ TLI.setUnavailable(LibFunc::htons);
+ TLI.setUnavailable(LibFunc::lchown);
+ TLI.setUnavailable(LibFunc::lstat);
+ TLI.setUnavailable(LibFunc::memccpy);
+ TLI.setUnavailable(LibFunc::mkdir);
+ TLI.setUnavailable(LibFunc::ntohl);
+ TLI.setUnavailable(LibFunc::ntohs);
+ TLI.setUnavailable(LibFunc::open);
+ TLI.setUnavailable(LibFunc::opendir);
+ TLI.setUnavailable(LibFunc::pclose);
+ TLI.setUnavailable(LibFunc::popen);
+ TLI.setUnavailable(LibFunc::pread);
+ TLI.setUnavailable(LibFunc::pwrite);
+ TLI.setUnavailable(LibFunc::read);
+ TLI.setUnavailable(LibFunc::readlink);
+ TLI.setUnavailable(LibFunc::realpath);
+ TLI.setUnavailable(LibFunc::rmdir);
+ TLI.setUnavailable(LibFunc::setitimer);
+ TLI.setUnavailable(LibFunc::stat);
+ TLI.setUnavailable(LibFunc::statvfs);
TLI.setUnavailable(LibFunc::stpcpy);
+ TLI.setUnavailable(LibFunc::stpncpy);
+ TLI.setUnavailable(LibFunc::strcasecmp);
+ TLI.setUnavailable(LibFunc::strncasecmp);
+ TLI.setUnavailable(LibFunc::times);
+ TLI.setUnavailable(LibFunc::uname);
+ TLI.setUnavailable(LibFunc::unlink);
+ TLI.setUnavailable(LibFunc::unsetenv);
+ TLI.setUnavailable(LibFunc::utime);
+ TLI.setUnavailable(LibFunc::utimes);
+ TLI.setUnavailable(LibFunc::write);
+
+ // Win32 does *not* provide provide these functions, but they are
+ // specified by C99:
+ TLI.setUnavailable(LibFunc::atoll);
+ TLI.setUnavailable(LibFunc::frexpf);
+ TLI.setUnavailable(LibFunc::llabs);
+ }
+
+ // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+ // Linux (GLIBC):
+ // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+ switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsl);
+ }
+
+ // ffsll is available on at least FreeBSD and Linux (GLIBC):
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+ switch (T.getOS()) {
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsll);
+ }
+
+ // The following functions are available on at least Linux:
+ if (T.getOS() != Triple::Linux) {
+ TLI.setUnavailable(LibFunc::dunder_strdup);
+ TLI.setUnavailable(LibFunc::dunder_strtok_r);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc::under_IO_getc);
+ TLI.setUnavailable(LibFunc::under_IO_putc);
+ TLI.setUnavailable(LibFunc::memalign);
+ TLI.setUnavailable(LibFunc::fopen64);
+ TLI.setUnavailable(LibFunc::fseeko64);
+ TLI.setUnavailable(LibFunc::fstat64);
+ TLI.setUnavailable(LibFunc::fstatvfs64);
+ TLI.setUnavailable(LibFunc::ftello64);
+ TLI.setUnavailable(LibFunc::lstat64);
+ TLI.setUnavailable(LibFunc::open64);
+ TLI.setUnavailable(LibFunc::stat64);
+ TLI.setUnavailable(LibFunc::statvfs64);
+ TLI.setUnavailable(LibFunc::tmpfile64);
}
}
@@ -351,11 +597,40 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
CustomNames = TLI.CustomNames;
}
+namespace {
+struct StringComparator {
+ /// Compare two strings and return true if LHS is lexicographically less than
+ /// RHS. Requires that RHS doesn't contain any zero bytes.
+ bool operator()(const char *LHS, StringRef RHS) const {
+ // Compare prefixes with strncmp. If prefixes match we know that LHS is
+ // greater or equal to RHS as RHS can't contain any '\0'.
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ }
+
+ // Provided for compatibility with MSVC's debug mode.
+ bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
+ bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
+};
+}
+
bool TargetLibraryInfo::getLibFunc(StringRef funcName,
LibFunc::Func &F) const {
const char **Start = &StandardNames[0];
const char **End = &StandardNames[LibFunc::NumLibFuncs];
- const char **I = std::lower_bound(Start, End, funcName);
+
+ // Filter out empty names and names containing null bytes, those can't be in
+ // our table.
+ if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ return false;
+
+ // Check for \01 prefix that is used to mangle __asm declarations and
+ // strip it if present.
+ if (funcName.front() == '\01')
+ funcName = funcName.substr(1);
+ const char **I = std::lower_bound(Start, End, funcName, StringComparator());
if (I != End && *I == funcName) {
F = (LibFunc::Func)(I - Start);
return true;
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 9d7e2b825f41..f5121e34f77f 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -13,21 +13,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -285,35 +285,35 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
return DataSection;
}
-/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// getTTypeGlobalReference - Return an MCExpr to use for a
/// reference to the specified global variable from exception
/// handling information.
const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const {
- const MCSymbol *Sym = Mang->getSymbol(GV);
- return getExprForDwarfReference(Sym, Encoding, Streamer);
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCSymbolRefExpr *Ref =
+ MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+
+ return getTTypeReference(Ref, Encoding, Streamer);
}
const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
- MCStreamer &Streamer) const {
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
-
+getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const {
switch (Encoding & 0x70) {
default:
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
// Do nothing special
- return Res;
+ return Sym;
case dwarf::DW_EH_PE_pcrel: {
// Emit a label to the streamer for the current position. This gives us
// .-foo addressing.
MCSymbol *PCSym = getContext().CreateTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ return MCBinaryExpr::CreateSub(Sym, PC, getContext());
}
}
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 382571982b96..e7282519d597 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,12 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
delete AsmInfo;
}
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index f69c2abd50d2..79f74bd66127 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm-c/TargetMachine.h"
#include "llvm-c/Core.h"
#include "llvm-c/Target.h"
-#include "llvm-c/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
@@ -184,7 +184,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
}
if (TM->addPassesToEmitFile(pass, destf, ft)) {
- error = "No DataLayout in TargetMachine";
+ error = "TargetMachine can't emit a file of this type";
*ErrorMessage = strdup(error.c_str());
return true;
}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
deleted file mode 100644
index be8b58289039..000000000000
--- a/lib/Target/TargetRegisterInfo.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetRegisterInfo interface.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
- regclass_iterator RCB, regclass_iterator RCE,
- const char *const *SRINames,
- const unsigned *SRILaneMasks)
- : InfoDesc(ID), SubRegIndexNames(SRINames),
- SubRegIndexLaneMasks(SRILaneMasks),
- RegClassBegin(RCB), RegClassEnd(RCE) {
-}
-
-TargetRegisterInfo::~TargetRegisterInfo() {}
-
-void PrintReg::print(raw_ostream &OS) const {
- if (!Reg)
- OS << "%noreg";
- else if (TargetRegisterInfo::isStackSlot(Reg))
- OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
- else if (TRI && Reg < TRI->getNumRegs())
- OS << '%' << TRI->getName(Reg);
- else
- OS << "%physreg" << Reg;
- if (SubIdx) {
- if (TRI)
- OS << ':' << TRI->getSubRegIndexName(SubIdx);
- else
- OS << ":sub(" << SubIdx << ')';
- }
-}
-
-void PrintRegUnit::print(raw_ostream &OS) const {
- // Generic printout when TRI is missing.
- if (!TRI) {
- OS << "Unit~" << Unit;
- return;
- }
-
- // Check for invalid register units.
- if (Unit >= TRI->getNumRegUnits()) {
- OS << "BadUnit~" << Unit;
- return;
- }
-
- // Normal units have at least one root.
- MCRegUnitRootIterator Roots(Unit, TRI);
- assert(Roots.isValid() && "Unit has no roots.");
- OS << TRI->getName(*Roots);
- for (++Roots; Roots.isValid(); ++Roots)
- OS << '~' << TRI->getName(*Roots);
-}
-
-/// getAllocatableClass - Return the maximal subclass of the given register
-/// class that is alloctable, or NULL.
-const TargetRegisterClass *
-TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
- if (!RC || RC->isAllocatable())
- return RC;
-
- const unsigned *SubClass = RC->getSubClassMask();
- for (unsigned Base = 0, BaseE = getNumRegClasses();
- Base < BaseE; Base += 32) {
- unsigned Idx = Base;
- for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
- unsigned Offset = CountTrailingZeros_32(Mask);
- const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
- if (SubRC->isAllocatable())
- return SubRC;
- Mask >>= Offset;
- Idx += Offset + 1;
- }
- }
- return NULL;
-}
-
-/// getMinimalPhysRegClass - Returns the Register Class of a physical
-/// register of the given type, picking the most sub register class of
-/// the right type that contains this physreg.
-const TargetRegisterClass *
-TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
- assert(isPhysicalRegister(reg) && "reg must be a physical register");
-
- // Pick the most sub register class of the right type that contains
- // this physreg.
- const TargetRegisterClass* BestRC = 0;
- for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
- const TargetRegisterClass* RC = *I;
- if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || BestRC->hasSubClass(RC)))
- BestRC = RC;
- }
-
- assert(BestRC && "Couldn't find the register class");
- return BestRC;
-}
-
-/// getAllocatableSetForRC - Toggle the bits that represent allocatable
-/// registers for the specific register class.
-static void getAllocatableSetForRC(const MachineFunction &MF,
- const TargetRegisterClass *RC, BitVector &R){
- assert(RC->isAllocatable() && "invalid for nonallocatable sets");
- ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
- for (unsigned i = 0; i != Order.size(); ++i)
- R.set(Order[i]);
-}
-
-BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
- const TargetRegisterClass *RC) const {
- BitVector Allocatable(getNumRegs());
- if (RC) {
- // A register class with no allocatable subclass returns an empty set.
- const TargetRegisterClass *SubClass = getAllocatableClass(RC);
- if (SubClass)
- getAllocatableSetForRC(MF, SubClass, Allocatable);
- } else {
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- if ((*I)->isAllocatable())
- getAllocatableSetForRC(MF, *I, Allocatable);
- }
-
- // Mask out the reserved registers
- BitVector Reserved = getReservedRegs(MF);
- Allocatable &= Reserved.flip();
-
- return Allocatable;
-}
-
-static inline
-const TargetRegisterClass *firstCommonClass(const uint32_t *A,
- const uint32_t *B,
- const TargetRegisterInfo *TRI) {
- for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
- if (unsigned Common = *A++ & *B++)
- return TRI->getRegClass(I + CountTrailingZeros_32(Common));
- return 0;
-}
-
-const TargetRegisterClass *
-TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) const {
- // First take care of the trivial cases.
- if (A == B)
- return A;
- if (!A || !B)
- return 0;
-
- // Register classes are ordered topologically, so the largest common
- // sub-class it the common sub-class with the smallest ID.
- return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
-}
-
-const TargetRegisterClass *
-TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- unsigned Idx) const {
- assert(A && B && "Missing register class");
- assert(Idx && "Bad sub-register index");
-
- // Find Idx in the list of super-register indices.
- for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
- if (RCI.getSubReg() == Idx)
- // The bit mask contains all register classes that are projected into B
- // by Idx. Find a class that is also a sub-class of A.
- return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
- return 0;
-}
-
-const TargetRegisterClass *TargetRegisterInfo::
-getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
- const TargetRegisterClass *RCB, unsigned SubB,
- unsigned &PreA, unsigned &PreB) const {
- assert(RCA && SubA && RCB && SubB && "Invalid arguments");
-
- // Search all pairs of sub-register indices that project into RCA and RCB
- // respectively. This is quadratic, but usually the sets are very small. On
- // most targets like X86, there will only be a single sub-register index
- // (e.g., sub_16bit projecting into GR16).
- //
- // The worst case is a register class like DPR on ARM.
- // We have indices dsub_0..dsub_7 projecting into that class.
- //
- // It is very common that one register class is a sub-register of the other.
- // Arrange for RCA to be the larger register so the answer will be found in
- // the first iteration. This makes the search linear for the most common
- // case.
- const TargetRegisterClass *BestRC = 0;
- unsigned *BestPreA = &PreA;
- unsigned *BestPreB = &PreB;
- if (RCA->getSize() < RCB->getSize()) {
- std::swap(RCA, RCB);
- std::swap(SubA, SubB);
- std::swap(BestPreA, BestPreB);
- }
-
- // Also terminate the search one we have found a register class as small as
- // RCA.
- unsigned MinSize = RCA->getSize();
-
- for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
- unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
- for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
- // Check if a common super-register class exists for this index pair.
- const TargetRegisterClass *RC =
- firstCommonClass(IA.getMask(), IB.getMask(), this);
- if (!RC || RC->getSize() < MinSize)
- continue;
-
- // The indexes must compose identically: PreA+SubA == PreB+SubB.
- unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
- if (FinalA != FinalB)
- continue;
-
- // Is RC a better candidate than BestRC?
- if (BestRC && RC->getSize() >= BestRC->getSize())
- continue;
-
- // Yes, RC is the smallest super-register seen so far.
- BestRC = RC;
- *BestPreA = IA.getSubReg();
- *BestPreB = IB.getSubReg();
-
- // Bail early if we reached MinSize. We won't find a better candidate.
- if (BestRC->getSize() == MinSize)
- return BestRC;
- }
- }
- return BestRC;
-}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 59ffdea00ea6..af0cef62d552 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -22,6 +22,10 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+ return false;
+}
+
bool TargetSubtargetInfo::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
deleted file mode 100644
index b36e6f858f72..000000000000
--- a/lib/Target/TargetTransformImpl.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetLowering.h"
-#include <utility>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by scalar transformations.
-//
-//===----------------------------------------------------------------------===//
-
-bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
- return TLI->isLegalAddImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
- return TLI->isLegalICmpImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
- return TLI->isLegalAddressingMode(AM, Ty);
-}
-
-bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return TLI->isTruncateFree(Ty1, Ty2);
-}
-
-bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
- EVT T = TLI->getValueType(Ty);
- return TLI->isTypeLegal(T);
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
- return TLI->getJumpBufAlignment();
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
- return TLI->getJumpBufSize();
-}
-
-bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
- return TLI->supportJumpTables() &&
- (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
- enum InstructionOpcodes {
-#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
-#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
-#include "llvm/Instruction.def"
- };
- switch (static_cast<InstructionOpcodes>(Opcode)) {
- case Ret: return 0;
- case Br: return 0;
- case Switch: return 0;
- case IndirectBr: return 0;
- case Invoke: return 0;
- case Resume: return 0;
- case Unreachable: return 0;
- case Add: return ISD::ADD;
- case FAdd: return ISD::FADD;
- case Sub: return ISD::SUB;
- case FSub: return ISD::FSUB;
- case Mul: return ISD::MUL;
- case FMul: return ISD::FMUL;
- case UDiv: return ISD::UDIV;
- case SDiv: return ISD::UDIV;
- case FDiv: return ISD::FDIV;
- case URem: return ISD::UREM;
- case SRem: return ISD::SREM;
- case FRem: return ISD::FREM;
- case Shl: return ISD::SHL;
- case LShr: return ISD::SRL;
- case AShr: return ISD::SRA;
- case And: return ISD::AND;
- case Or: return ISD::OR;
- case Xor: return ISD::XOR;
- case Alloca: return 0;
- case Load: return ISD::LOAD;
- case Store: return ISD::STORE;
- case GetElementPtr: return 0;
- case Fence: return 0;
- case AtomicCmpXchg: return 0;
- case AtomicRMW: return 0;
- case Trunc: return ISD::TRUNCATE;
- case ZExt: return ISD::ZERO_EXTEND;
- case SExt: return ISD::SIGN_EXTEND;
- case FPToUI: return ISD::FP_TO_UINT;
- case FPToSI: return ISD::FP_TO_SINT;
- case UIToFP: return ISD::UINT_TO_FP;
- case SIToFP: return ISD::SINT_TO_FP;
- case FPTrunc: return ISD::FP_ROUND;
- case FPExt: return ISD::FP_EXTEND;
- case PtrToInt: return ISD::BITCAST;
- case IntToPtr: return ISD::BITCAST;
- case BitCast: return ISD::BITCAST;
- case ICmp: return ISD::SETCC;
- case FCmp: return ISD::SETCC;
- case PHI: return 0;
- case Call: return 0;
- case Select: return ISD::SELECT;
- case UserOp1: return 0;
- case UserOp2: return 0;
- case VAArg: return 0;
- case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
- case InsertElement: return ISD::INSERT_VECTOR_ELT;
- case ShuffleVector: return ISD::VECTOR_SHUFFLE;
- case ExtractValue: return ISD::MERGE_VALUES;
- case InsertValue: return ISD::MERGE_VALUES;
- case LandingPad: return 0;
- }
-
- llvm_unreachable("Unknown instruction type encountered!");
-}
-
-std::pair<unsigned, MVT>
-VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const {
-
- LLVMContext &C = Ty->getContext();
- EVT MTy = TLI->getValueType(Ty);
-
- unsigned Cost = 1;
- // We keep legalizing the type until we find a legal kind. We assume that
- // the only operation that costs anything is the split. After splitting
- // we need to handle two types.
- while (true) {
- TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy);
-
- if (LK.first == TargetLowering::TypeLegal)
- return std::make_pair(Cost, MTy.getSimpleVT());
-
- if (LK.first == TargetLowering::TypeSplitVector ||
- LK.first == TargetLowering::TypeExpandInteger)
- Cost *= 2;
-
- // Keep legalizing the type.
- MTy = LK.second;
- }
-}
-
-unsigned
-VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
- bool Insert,
- bool Extract) const {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
-unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty) const {
- // Check if any of the operands are vector operands.
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1. Multiply
- // by the type-legalization overhead.
- return LT.first * 1;
- }
-
- // Else, assume that we need to scalarize this op.
- if (Ty->isVectorTy()) {
- unsigned Num = Ty->getVectorNumElements();
- unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
- // return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(Ty, true, true) + Num * Cost;
- }
-
- // We don't know anything about this scalar instruction.
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> SrcLT = getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> DstLT = getTypeLegalizationCost(Dst);
-
- // Handle scalar conversions.
- if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
- // Scalar bitcasts are usually free.
- if (Opcode == Instruction::BitCast)
- return 0;
-
- if (Opcode == Instruction::Trunc &&
- TLI->isTruncateFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::ZExt &&
- TLI->isZExtFree(SrcLT.second, DstLT.second))
- return 0;
-
- // Just check the op cost. If the operation is legal then assume it costs 1.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return 1;
-
- // Assume that illegal scalar instruction are expensive.
- return 4;
- }
-
- // Check vector-to-vector casts.
- if (Dst->isVectorTy() && Src->isVectorTy()) {
-
- // If the cast is between same-sized registers, then the check is simple.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
- // Bitcast between types that are legalized to the same type are free.
- if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
- return 0;
-
- // Assume that Zext is done using AND.
- if (Opcode == Instruction::ZExt)
- return 1;
-
- // Assume that sext is done using SHL and SRA.
- if (Opcode == Instruction::SExt)
- return 2;
-
- // Just check the op cost. If the operation is legal then assume it costs
- // 1 and multiply by the type-legalization overhead.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return SrcLT.first * 1;
- }
-
- // If we are converting vectors and the operation is illegal, or
- // if the vectors are legalized to different types, estimate the
- // scalarization costs.
- unsigned Num = Dst->getVectorNumElements();
- unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType());
-
- // Return the cost of multiple scalar invocation plus the cost of
- // inserting and extracting the values.
- return getScalarizationOverhead(Dst, true, true) + Num * Cost;
- }
-
- // We already handled vector-to-vector and scalar-to-scalar conversions. This
- // is where we handle bitcast between vectors and scalars. We need to assume
- // that the conversion is scalarized in one way or another.
- if (Opcode == Instruction::BitCast)
- // Illegal bitcasts are done by storing and loading from a stack slot.
- return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
- (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
- llvm_unreachable("Unhandled cast");
- }
-
-unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
- Type *ValTy,
- Type *CondTy) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- // Selects on vectors are actually vector selects.
- if (ISD == ISD::SELECT) {
- assert(CondTy && "CondTy must exist");
- if (CondTy->isVectorTy())
- ISD = ISD::VSELECT;
- }
-
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1. Multiply
- // by the type-legalization overhead.
- return LT.first * 1;
- }
-
- // Otherwise, assume that the cast is scalarized.
- if (ValTy->isVectorTy()) {
- unsigned Num = ValTy->getVectorNumElements();
- if (CondTy)
- CondTy = CondTy->getScalarType();
- unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
- CondTy);
-
- // Return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
- }
-
- // Unknown scalar opcode.
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
- Type *Val,
- unsigned Index) const {
- return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
- Type *Ty2) const {
- return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
-
- // Assume that all loads of legal types cost 1.
- return LT.first;
-}
-
-unsigned
-VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp);
- return LT.first;
-}
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 47489bb06c4e..54204d4b6390 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMX86AsmParser
- X86AsmLexer.cpp
X86AsmParser.cpp
)
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
deleted file mode 100644
index 66ad35370936..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallVector.h"
-
-using namespace llvm;
-
-namespace {
-
-class X86AsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- bool tentativeIsValid;
- AsmToken tentativeToken;
-
- const AsmToken &lexTentative() {
- tentativeToken = getLexer()->Lex();
- tentativeIsValid = true;
- return tentativeToken;
- }
-
- const AsmToken &lexDefinite() {
- if (tentativeIsValid) {
- tentativeIsValid = false;
- return tentativeToken;
- }
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenATT();
- AsmToken LexTokenIntel();
-protected:
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenATT();
- case 1:
- return LexTokenIntel();
- }
- }
-public:
- X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
- }
-};
-
-} // end anonymous namespace
-
-#define GET_REGISTER_MATCHER
-#include "X86GenAsmMatcher.inc"
-
-AsmToken X86AsmLexer::LexTokenATT() {
- AsmToken lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default:
- return lexedToken;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return lexedToken;
-
- case AsmToken::Percent: {
- const AsmToken &nextToken = lexTentative();
- if (nextToken.getKind() != AsmToken::Identifier)
- return lexedToken;
-
- if (unsigned regID = MatchRegisterName(nextToken.getString())) {
- lexDefinite();
-
- // FIXME: This is completely wrong when there is a space or other
- // punctuation between the % and the register name.
- StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
- nextToken.getString().size());
-
- return AsmToken(AsmToken::Register, regStr,
- static_cast<int64_t>(regID));
- }
-
- // Match register name failed. If this is "db[0-7]", match it as an alias
- // for dr[0-7].
- if (nextToken.getString().size() == 3 &&
- nextToken.getString().startswith("db")) {
- int RegNo = -1;
- switch (nextToken.getString()[2]) {
- case '0': RegNo = X86::DR0; break;
- case '1': RegNo = X86::DR1; break;
- case '2': RegNo = X86::DR2; break;
- case '3': RegNo = X86::DR3; break;
- case '4': RegNo = X86::DR4; break;
- case '5': RegNo = X86::DR5; break;
- case '6': RegNo = X86::DR6; break;
- case '7': RegNo = X86::DR7; break;
- }
-
- if (RegNo != -1) {
- lexDefinite();
-
- // FIXME: This is completely wrong when there is a space or other
- // punctuation between the % and the register name.
- StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
- nextToken.getString().size());
- return AsmToken(AsmToken::Register, regStr,
- static_cast<int64_t>(RegNo));
- }
- }
-
-
- return lexedToken;
- }
- }
-}
-
-AsmToken X86AsmLexer::LexTokenIntel() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch(lexedToken.getKind()) {
- default:
- return lexedToken;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return lexedToken;
- case AsmToken::Identifier: {
- unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
- if (regID)
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- return lexedToken;
- }
- }
-}
-
-extern "C" void LLVMInitializeX86AsmLexer() {
- RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
- RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
-}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ce446e75737c..e4623228b397 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -8,21 +8,22 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -57,11 +58,18 @@ private:
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
- X86Operand *ParseIntelTypeOperator(SMLoc StartLoc);
- X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
- X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
+ X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
+ X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+ SMLoc StartLoc);
+ X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
+ unsigned Size);
+ X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
+ SMLoc &IdentStart);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+ SMLoc SizeDirLoc, unsigned Size);
+
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
SmallString<64> &Err);
@@ -168,31 +176,35 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SMLoc OffsetOfLoc;
+ bool AddressOf;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ };
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNo;
- } Reg;
-
- struct {
- const MCExpr *Val;
- bool NeedAsmRewrite;
- } Imm;
-
- struct {
- unsigned SegReg;
- const MCExpr *Disp;
- unsigned BaseReg;
- unsigned IndexReg;
- unsigned Scale;
- unsigned Size;
- bool NeedSizeDir;
- } Mem;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -230,11 +242,6 @@ struct X86Operand : public MCParsedAsmOperand {
return Imm.Val;
}
- bool needAsmRewrite() const {
- assert(Kind == Immediate && "Invalid access!");
- return Imm.NeedAsmRewrite;
- }
-
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
@@ -331,18 +338,12 @@ struct X86Operand : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
- unsigned getMemSize() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.Size;
- }
-
bool isOffsetOf() const {
return OffsetOfLoc.getPointer();
}
- bool needSizeDirective() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.NeedSizeDir;
+ bool needAddressOf() const {
+ return AddressOf;
}
bool isMem() const { return Kind == Memory; }
@@ -463,7 +464,7 @@ struct X86Operand : public MCParsedAsmOperand {
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
- SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
+ SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
@@ -471,24 +472,24 @@ struct X86Operand : public MCParsedAsmOperand {
}
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false,
SMLoc OffsetOfLoc = SMLoc()) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
+ Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
- bool NeedRewrite = true){
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
- Res->Imm.NeedAsmRewrite = NeedRewrite;
return Res;
}
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false){
+ unsigned Size = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -496,7 +497,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
@@ -504,7 +505,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -519,7 +520,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
};
@@ -558,10 +559,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
Parser.Lex(); // Eat percent token.
const AsmToken &Tok = Parser.getTok();
+ EndLoc = Tok.getEndLoc();
+
if (Tok.isNot(AsmToken::Identifier)) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
RegNo = MatchRegisterName(Tok.getString());
@@ -582,13 +585,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
X86II::isX86_64ExtendedReg(RegNo))
return Error(StartLoc, "register %"
+ Tok.getString() + " is only available in 64-bit mode",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
- EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
@@ -615,11 +617,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
- EndLoc = Tok.getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat ')'
return false;
}
+ EndLoc = Parser.getTok().getEndLoc();
+
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -636,7 +640,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
}
if (RegNo != 0) {
- EndLoc = Tok.getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat it.
return false;
}
@@ -645,10 +649,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == 0) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
- EndLoc = Tok.getEndLoc();
Parser.Lex(); // Eat identifier token.
return false;
}
@@ -673,115 +676,354 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
return Size;
}
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+enum IntelBracExprState {
+ IBES_START,
+ IBES_LBRAC,
+ IBES_RBRAC,
+ IBES_REGISTER,
+ IBES_REGISTER_STAR,
+ IBES_REGISTER_STAR_INTEGER,
+ IBES_INTEGER,
+ IBES_INTEGER_STAR,
+ IBES_INDEX_REGISTER,
+ IBES_IDENTIFIER,
+ IBES_DISP_EXPR,
+ IBES_MINUS,
+ IBES_ERROR
+};
+
+class IntelBracExprStateMachine {
+ IntelBracExprState State;
+ unsigned BaseReg, IndexReg, Scale;
+ int64_t Disp;
+
+ unsigned TmpReg;
+ int64_t TmpInteger;
+
+ bool isPlus;
+
+public:
+ IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
+ State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
+ TmpReg(0), TmpInteger(0), isPlus(true) {}
+
+ unsigned getBaseReg() { return BaseReg; }
+ unsigned getIndexReg() { return IndexReg; }
+ unsigned getScale() { return Scale; }
+ int64_t getDisp() { return Disp; }
+ bool isValidEndState() { return State == IBES_RBRAC; }
+
+ void onPlus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = true;
+ }
+ void onMinus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_MINUS;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = false;
+ }
+ void onRegister(unsigned Reg) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_REGISTER;
+ TmpReg = Reg;
+ break;
+ case IBES_INTEGER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = Reg;
+ Scale = TmpInteger;
+ break;
+ }
+ }
+ void onDispExpr() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_DISP_EXPR;
+ break;
+ }
+ }
+ void onInteger(int64_t TmpInt) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_MINUS:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_REGISTER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = TmpReg;
+ Scale = TmpInt;
+ break;
+ }
+ }
+ void onStar() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_INTEGER_STAR;
+ break;
+ case IBES_REGISTER:
+ State = IBES_REGISTER_STAR;
+ break;
+ }
+ }
+ void onLBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_RBRAC:
+ State = IBES_START;
+ isPlus = true;
+ break;
+ }
+ }
+ void onRBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_DISP_EXPR:
+ State = IBES_RBRAC;
+ break;
+ case IBES_INTEGER:
+ State = IBES_RBRAC;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_RBRAC;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_RBRAC;
+ break;
+ }
+ }
+};
+
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+ SMLoc End, SMLoc SizeDirLoc,
+ unsigned Size) {
+ bool NeedSizeDir = false;
+ bool IsVarDecl = false;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size) {
+ Size = tType * 8; // Size is in terms of bits in this context.
+ NeedSizeDir = Size > 0;
+ }
+ }
+
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
+ if (NeedSizeDir)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+ /*Len*/0, Size));
+
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+ uint64_t ImmDisp,
unsigned Size) {
- unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Tok.getLoc(), End;
-
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
- // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
+ SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
// Eat '['
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
Parser.Lex();
+ unsigned TmpReg = 0;
+
+ // Try to handle '[' 'Symbol' ']'
if (getLexer().is(AsmToken::Identifier)) {
- // Parse BaseReg
- if (ParseRegister(BaseReg, Start, End)) {
- // Handle '[' 'symbol' ']'
- if (getParser().ParseExpression(Disp, End)) return 0;
+ if (ParseRegister(TmpReg, Start, End)) {
+ const MCExpr *Disp;
+ SMLoc IdentStart = Tok.getLoc();
+ if (getParser().parseExpression(Disp, End))
+ return 0;
+
+ if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+ return Err;
+
if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(Start, "Expected ']' token!");
- Parser.Lex();
- End = Tok.getLoc();
- return X86Operand::CreateMem(Disp, Start, End, Size);
- }
- } else if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Tok.getIntVal();
+ return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
+
+ // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
+ if (ImmDisp)
+ return ErrorOperand(Start, "Unsupported immediate displacement!");
+
+ // Adjust the EndLoc due to the ']'.
+ End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
Parser.Lex();
- SMLoc Loc = Tok.getLoc();
- if (getLexer().is(AsmToken::RBrac)) {
- // Handle '[' number ']'
- Parser.Lex();
- End = Tok.getLoc();
- const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
- if (SegReg)
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
- Start, End, Size);
+ if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- } else if (getLexer().is(AsmToken::Star)) {
- // Handle '[' Scale*IndexReg ']'
- Parser.Lex();
- SMLoc IdxRegLoc = Tok.getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
- Scale = Val;
- } else
- return ErrorOperand(Loc, "Unexpected token");
- }
- // Parse ][ as a plus.
- bool ExpectRBrac = true;
- if (getLexer().is(AsmToken::RBrac)) {
- ExpectRBrac = false;
- Parser.Lex();
- End = Tok.getLoc();
+ // We want the size directive before the '['.
+ SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+ return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+ }
}
- if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
- getLexer().is(AsmToken::LBrac)) {
- ExpectRBrac = true;
- bool isPlus = getLexer().is(AsmToken::Plus) ||
- getLexer().is(AsmToken::LBrac);
- Parser.Lex();
- SMLoc PlusLoc = Tok.getLoc();
- if (getLexer().is(AsmToken::Integer)) {
+ // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
+ // immediate displacement before the bracketed expression.
+ bool Done = false;
+ IntelBracExprStateMachine SM(Parser, ImmDisp);
+
+ // If we parsed a register, then the end loc has already been set and
+ // the identifier has already been lexed. We also need to update the
+ // state.
+ if (TmpReg)
+ SM.onRegister(TmpReg);
+
+ const MCExpr *Disp = 0;
+ while (!Done) {
+ bool UpdateLocLex = true;
+
+ // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
+ // identifier. Don't try an parse it as a register.
+ if (Tok.getString().startswith("."))
+ break;
+
+ switch (getLexer().getKind()) {
+ default: {
+ if (SM.isValidEndState()) {
+ Done = true;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ }
+ case AsmToken::Identifier: {
+ // This could be a register or a displacement expression.
+ if(!ParseRegister(TmpReg, Start, End)) {
+ SM.onRegister(TmpReg);
+ UpdateLocLex = false;
+ break;
+ } else if (!getParser().parseExpression(Disp, End)) {
+ SM.onDispExpr();
+ UpdateLocLex = false;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ }
+ case AsmToken::Integer: {
int64_t Val = Tok.getIntVal();
- Parser.Lex();
- if (getLexer().is(AsmToken::Star)) {
- Parser.Lex();
- SMLoc IdxRegLoc = Tok.getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
- Scale = Val;
- } else if (getLexer().is(AsmToken::RBrac)) {
- const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
- Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
- } else
- return ErrorOperand(PlusLoc, "unexpected token after +");
- } else if (getLexer().is(AsmToken::Identifier)) {
- // This could be an index register or a displacement expression.
- End = Tok.getLoc();
- if (!IndexReg)
- ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End)) return 0;
+ SM.onInteger(Val);
+ break;
}
- }
-
- // Parse ][ as a plus.
- if (getLexer().is(AsmToken::RBrac)) {
- ExpectRBrac = false;
- Parser.Lex();
- End = Tok.getLoc();
- if (getLexer().is(AsmToken::LBrac)) {
- ExpectRBrac = true;
- Parser.Lex();
- if (getParser().ParseExpression(Disp, End))
- return 0;
+ case AsmToken::Plus: SM.onPlus(); break;
+ case AsmToken::Minus: SM.onMinus(); break;
+ case AsmToken::Star: SM.onStar(); break;
+ case AsmToken::LBrac: SM.onLBrac(); break;
+ case AsmToken::RBrac: SM.onRBrac(); break;
+ }
+ if (!Done && UpdateLocLex) {
+ End = Tok.getLoc();
+ Parser.Lex(); // Consume the token.
}
- } else if (ExpectRBrac) {
- if (getParser().ParseExpression(Disp, End))
- return 0;
}
- if (ExpectRBrac) {
- if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(End, "expected ']' token!");
- Parser.Lex();
- End = Tok.getLoc();
- }
+ if (!Disp)
+ Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
// Parse the dot operator (e.g., [ebx].foo.bar).
if (Tok.getString().startswith(".")) {
@@ -790,22 +1032,73 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (ParseIntelDotOperator(Disp, &NewDisp, Err))
return ErrorOperand(Tok.getLoc(), Err);
+ End = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
}
- End = Tok.getLoc();
+ int BaseReg = SM.getBaseReg();
+ int IndexReg = SM.getIndexReg();
// handle [-42]
- if (!BaseReg && !IndexReg)
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ if (!BaseReg && !IndexReg) {
+ if (!SegReg)
+ return X86Operand::CreateMem(Disp, Start, End);
+ else
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ }
+ int Scale = SM.getScale();
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
Start, End, Size);
}
+// Inline assembly may use variable names with namespace alias qualifiers.
+X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
+ SMLoc &IdentStart) {
+ // We should only see Foo::Bar if we're parsing inline assembly.
+ if (!isParsingInlineAsm())
+ return 0;
+
+ // If we don't see a ':' then there can't be a qualifier.
+ if (getLexer().isNot(AsmToken::Colon))
+ return 0;
+
+
+ bool Done = false;
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc IdentEnd = Tok.getEndLoc();
+ while (!Done) {
+ switch (getLexer().getKind()) {
+ default:
+ Done = true;
+ break;
+ case AsmToken::Colon:
+ getLexer().Lex(); // Consume ':'.
+ if (getLexer().isNot(AsmToken::Colon))
+ return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+ getLexer().Lex(); // Consume second ':'.
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
+ break;
+ case AsmToken::Identifier:
+ IdentEnd = Tok.getEndLoc();
+ getLexer().Lex(); // Consume the identifier.
+ break;
+ }
+ }
+ size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
+ StringRef Identifier(IdentStart.getPointer(), Len);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+ return 0;
+}
+
/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
+ uint64_t ImmDisp,
+ SMLoc Start) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -817,8 +1110,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
Parser.Lex();
}
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (getLexer().is(AsmToken::Integer)) {
+ const AsmToken &IntTok = Parser.getTok();
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+ IntTok.getLoc()));
+ uint64_t ImmDisp = IntTok.getIntVal();
+ Parser.Lex(); // Eat the integer.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ }
+
if (getLexer().is(AsmToken::LBrac))
- return ParseIntelBracExpression(SegReg, Size);
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
if (!ParseRegister(SegReg, Start, End)) {
// Handel SegReg : [ ... ]
@@ -827,32 +1133,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
Parser.Lex(); // Eat :
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Size);
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
}
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
- if (getParser().ParseExpression(Disp, End)) return 0;
- End = Parser.getTok().getLoc();
+ SMLoc IdentStart = Tok.getLoc();
+ if (getParser().parseExpression(Disp, End))
+ return 0;
- bool NeedSizeDir = false;
- if (!Size && isParsingInlineAsm()) {
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
- NeedSizeDir = Size > 0;
- }
- }
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else
- // When parsing inline assembly we set the base register to a non-zero value
- // as we don't know the actual value at this time. This is necessary to
- // get the matching correct in some cases.
- return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
- /*Scale*/1, Start, End, Size, NeedSizeDir);
+
+ if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+ return Err;
+
+ return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
}
/// Parse the '.' operator.
@@ -918,11 +1213,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
SMLoc End;
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return ErrorOperand(Start, "Unable to parse expression!");
- End = Parser.getTok().getLoc();
-
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -930,13 +1223,23 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
// register operand to ensure proper matching. Just pick a GPR based on
// the size of a pointer.
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
+ return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
+ OffsetOfLoc);
}
-/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or
-/// C++ type or variable. If the variable is an array, TYPE returns the size of
-/// a single element of the array.
-X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
+enum IntelOperatorKind {
+ IOK_LENGTH,
+ IOK_SIZE,
+ IOK_TYPE
+};
+
+/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
+/// returns the number of elements in an array. It returns the value 1 for
+/// non-array variables. The SIZE operator returns the size of a C or C++
+/// variable. A variable's size is the product of its LENGTH and TYPE. The
+/// TYPE operator returns the size of a C or C++ type or variable. If the
+/// variable is an array, TYPE returns the size of a single element.
+X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
SMLoc TypeLoc = Start;
Parser.Lex(); // Eat offset.
Start = Parser.getTok().getLoc();
@@ -944,76 +1247,92 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
SMLoc End;
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return 0;
- End = Parser.getTok().getLoc();
-
- unsigned Size = 0;
+ unsigned Length = 0, Size = 0, Type = 0;
if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
const MCSymbol &Sym = SymRef->getSymbol();
// FIXME: The SemaLookup will fail if the name is anything other then an
// identifier.
// FIXME: Pass a valid SMLoc.
- if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
- return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
-
- Size /= 8; // Size is in terms of bits, but we want bytes in the context.
+ bool IsVarDecl;
+ if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
+ Size, Type, IsVarDecl))
+ return ErrorOperand(Start, "Unable to lookup expr!");
+ }
+ unsigned CVal;
+ switch(OpKind) {
+ default: llvm_unreachable("Unexpected operand kind!");
+ case IOK_LENGTH: CVal = Length; break;
+ case IOK_SIZE: CVal = Size; break;
+ case IOK_TYPE: CVal = Type; break;
}
// Rewrite the type operator and the C or C++ type or variable in terms of an
// immediate. E.g. TYPE foo -> $$4
unsigned Len = End.getPointer() - TypeLoc.getPointer();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size));
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
- const MCExpr *Imm = MCConstantExpr::Create(Size, getContext());
- return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+ const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
+ return X86Operand::CreateImm(Imm, Start, End);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
-
- // offset operator.
StringRef AsmTokStr = Parser.getTok().getString();
- if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") &&
- isParsingInlineAsm())
- return ParseIntelOffsetOfOperator(Start);
-
- // Type directive.
- if ((AsmTokStr == "type" || AsmTokStr == "TYPE") &&
- isParsingInlineAsm())
- return ParseIntelTypeOperator(Start);
-
- // Unsupported directives.
- if (isParsingIntelSyntax() &&
- (AsmTokStr == "size" || AsmTokStr == "SIZE" ||
- AsmTokStr == "length" || AsmTokStr == "LENGTH"))
- return ErrorOperand(Start, "Unsupported directive!");
-
- // immediate.
+
+ // Offset, length, type and size operators.
+ if (isParsingInlineAsm()) {
+ if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+ return ParseIntelOffsetOfOperator(Start);
+ if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
+ return ParseIntelOperator(Start, IOK_LENGTH);
+ if (AsmTokStr == "size" || AsmTokStr == "SIZE")
+ return ParseIntelOperator(Start, IOK_SIZE);
+ if (AsmTokStr == "type" || AsmTokStr == "TYPE")
+ return ParseIntelOperator(Start, IOK_TYPE);
+ }
+
+ // Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
- if (!getParser().ParseExpression(Val, End)) {
- End = Parser.getTok().getLoc();
- return X86Operand::CreateImm(Val, Start, End);
+ bool isInteger = getLexer().is(AsmToken::Integer);
+ if (!getParser().parseExpression(Val, End)) {
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+ // Immediate.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return X86Operand::CreateImm(Val, Start, End);
+
+ // Only positive immediates are valid.
+ if (!isInteger) {
+ Error(Parser.getTok().getLoc(), "expected a positive immediate "
+ "displacement before bracketed expr.");
+ return 0;
+ }
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
+ return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
}
}
- // register
+ // Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
- return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc());
+ return X86Operand::CreateReg(RegNo, Start, End);
getParser().Lex(); // Eat the colon.
- return ParseIntelMemOperand(RegNo, Start);
+ return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
}
- // mem operand
- return ParseIntelMemOperand(0, Start);
+ // Memory operand.
+ return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
}
X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1037,7 +1356,6 @@ X86Operand *X86AsmParser::ParseATTOperand() {
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -1046,7 +1364,7 @@ X86Operand *X86AsmParser::ParseATTOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return 0;
return X86Operand::CreateImm(Val, Start, End);
}
@@ -1064,7 +1382,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
- if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+ if (getParser().parseExpression(Disp, ExprEnd)) return 0;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
@@ -1090,7 +1408,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc ExprEnd;
// It must be an parenthesized expression, parse it now.
- if (getParser().ParseParenExpression(Disp, ExprEnd))
+ if (getParser().parseParenExpression(Disp, ExprEnd))
return 0;
// After parsing the base expression we could either have a parenthesized
@@ -1150,7 +1468,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
- if (getParser().ParseAbsoluteExpression(ScaleVal)){
+ if (getParser().parseAbsoluteExpression(ScaleVal)){
Error(Loc, "expected scale expression");
return 0;
}
@@ -1169,7 +1487,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
- if (getParser().ParseAbsoluteExpression(Value))
+ if (getParser().parseAbsoluteExpression(Value))
return 0;
if (Value != 1)
@@ -1183,7 +1501,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
return 0;
}
- SMLoc MemEnd = Parser.getTok().getLoc();
+ SMLoc MemEnd = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ')'.
// If we have both a base register and an index register make sure they are
@@ -1310,7 +1628,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
@@ -1321,14 +1639,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
@@ -1509,245 +1827,78 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
- switch (Inst.getOpcode()) {
- default: return false;
- case X86::AND16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
}
}
+static const char *getSubtargetFeatureName(unsigned Val);
bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -1809,10 +1960,21 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
return false;
- case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, MatchingInlineAsm);
- return true;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing.
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ }
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1843,19 +2005,32 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
+ unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match1 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match2 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[2];
Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match3 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[3];
Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match4 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
// Restore the old token.
Op->setTokenValue(Base);
@@ -1936,9 +2111,16 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, MatchingInlineAsm);
- return true;
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
+ if (ErrorInfoMissingFeature & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
@@ -1986,10 +2168,10 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -2027,16 +2209,13 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
return false;
}
-
-extern "C" void LLVMInitializeX86AsmLexer();
-
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
- LLVMInitializeX86AsmLexer();
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
+#define GET_SUBTARGET_FEATURE_NAME
#include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index f4d03a602cf5..d14899d28a23 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -10,7 +10,6 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(X86CommonTableGen)
set(sources
@@ -26,11 +25,13 @@ set(sources
X86JITInfo.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
+ X86PadShortFunction.cpp
X86RegisterInfo.cpp
X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
+ X86TargetTransformInfo.cpp
X86VZeroUpper.cpp
)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f13692739a17..ca6f80ce3e58 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -16,11 +16,9 @@
#include "X86Disassembler.h"
#include "X86DisassemblerDecoder.h"
-
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -33,7 +31,6 @@
#include "X86GenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
#include "X86GenInstrInfo.inc"
-#include "X86GenEDInfo.inc"
using namespace llvm;
using namespace llvm::X86Disassembler;
@@ -84,10 +81,6 @@ X86GenericDisassembler::~X86GenericDisassembler() {
delete MII;
}
-const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
- return instInfoX86;
-}
-
/// regionReader - a callback function that wraps the readByte method from
/// MemoryObject.
///
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 981701f52764..b92427a7e91a 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -95,8 +95,6 @@ class MCSubtargetInfo;
class MemoryObject;
class raw_ostream;
-struct EDInstInfo;
-
namespace X86Disassembler {
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
@@ -122,8 +120,6 @@ public:
raw_ostream &vStream,
raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
private:
DisassemblerMode fMode;
};
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 85d8a991dd6e..e40edba6d689 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode) {
const struct ContextDecision* decision = 0;
-
+
switch (type) {
case ONEBYTE:
decision = &ONEBYTE_SYM;
@@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type,
uint8_t opcode,
uint8_t modRM) {
const struct ModRMDecision* dec = 0;
-
+
switch (type) {
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
@@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type,
dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
}
-
+
switch (dec->modrm_type) {
default:
debug("Corrupt table! Unknown modrm_type");
@@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
*/
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
-
+
if (!ret)
++(insn->readerCursor);
-
+
return ret;
}
@@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t)
*/
static void dbgprintf(struct InternalInstruction* insn,
const char* format,
- ...) {
+ ...) {
char buffer[256];
va_list ap;
-
+
if (!insn->dlog)
return;
-
+
va_start(ap, format);
(void)vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
-
+
insn->dlog(insn->dlogArg, buffer);
-
+
return;
}
@@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
uint8_t byte = 0;
-
+
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
-
+
dbgprintf(insn, "readPrefixes()");
-
+
while (isPrefix) {
prefixLocation = insn->readerCursor;
-
+
if (consumeByte(insn, &byte))
return -1;
/*
- * If the first byte is a LOCK prefix break and let it be disassembled
- * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
- * FIXME there is currently no way to get the disassembler to print the
- * lock prefix if it is not the first byte.
+ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+ * break and let it be disassembled as a normal "instruction".
*/
- if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
- break;
-
+ if (insn->readerCursor - 1 == insn->startLocation
+ && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
+ uint8_t nextByte;
+ if (byte == 0xf0)
+ break;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+ if (consumeByte(insn, &nextByte))
+ return -1;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ unconsumeByte(insn);
+ }
+ if (nextByte != 0x0f && nextByte != 0x90)
+ break;
+ }
+
switch (byte) {
case 0xf0: /* LOCK */
case 0xf2: /* REPNE/REPNZ */
@@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
isPrefix = FALSE;
break;
}
-
+
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
-
+
insn->vexSize = 0;
-
+
if (byte == 0xc4) {
uint8_t byte1;
-
+
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
-
+
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 3;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
@@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
-
+
if (insn->vexSize == 3) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
consumeByte(insn, &insn->vexPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
-
+
if (insn->mode == MODE_64BIT) {
- insn->rexPrefix = 0x40
+ insn->rexPrefix = 0x40
| (wFromVEX3of3(insn->vexPrefix[2]) << 3)
| (rFromVEX2of3(insn->vexPrefix[1]) << 2)
| (xFromVEX2of3(insn->vexPrefix[1]) << 1)
| (bFromVEX2of3(insn->vexPrefix[1]) << 0);
}
-
+
switch (ppFromVEX3of3(insn->vexPrefix[2]))
{
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = TRUE;
break;
}
-
+
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
}
}
else if (byte == 0xc5) {
uint8_t byte1;
-
+
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
-
+
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 2;
}
else {
unconsumeByte(insn);
}
-
+
if (insn->vexSize == 2) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
-
+
if (insn->mode == MODE_64BIT) {
- insn->rexPrefix = 0x40
+ insn->rexPrefix = 0x40
| (rFromVEX2of2(insn->vexPrefix[1]) << 2);
}
-
+
switch (ppFromVEX2of2(insn->vexPrefix[1]))
{
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = TRUE;
break;
}
-
+
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
}
}
@@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
-
+
if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
-
+
insn->rexPrefix = byte;
insn->necessaryPrefixLocation = insn->readerCursor - 2;
-
+
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->immediateSize = (hasOpSize ? 2 : 4);
}
}
-
+
return 0;
}
@@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
* @param insn - The instruction whose opcode is to be read.
* @return - 0 if the opcode could be read successfully; nonzero otherwise.
*/
-static int readOpcode(struct InternalInstruction* insn) {
+static int readOpcode(struct InternalInstruction* insn) {
/* Determine the length of the primary opcode */
-
+
uint8_t current;
-
+
dbgprintf(insn, "readOpcode()");
-
+
insn->opcodeType = ONEBYTE;
-
+
if (insn->vexSize == 3)
{
switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
{
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
- return -1;
+ return -1;
case 0:
break;
case VEX_LOB_0F:
@@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->threeByteEscape = 0x38;
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F3A:
+ case VEX_LOB_0F3A:
insn->twoByteEscape = 0x0f;
insn->threeByteEscape = 0x3a;
insn->opcodeType = THREEBYTE_3A;
@@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
-
+
if (consumeByte(insn, &current))
return -1;
-
+
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
-
+
insn->twoByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_38;
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_3A;
} else if (current == 0xa6) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_A6;
} else if (current == 0xa7) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_A7;
} else {
dbgprintf(insn, "Didn't find a three-byte escape prefix");
-
+
insn->opcodeType = TWOBYTE;
}
}
-
+
/*
* At this point we have consumed the full opcode.
* Anything we consume from here on must be unconsumed.
*/
-
+
insn->opcode = current;
-
+
return 0;
}
@@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID,
struct InternalInstruction* insn,
uint8_t attrMask) {
BOOL hasModRMExtension;
-
+
uint8_t instructionClass;
instructionClass = contextForAttrs(attrMask);
-
+
hasModRMExtension = modRMRequired(insn->opcodeType,
instructionClass,
insn->opcode);
-
+
if (hasModRMExtension) {
if (readModRM(insn))
return -1;
-
+
*instructionID = decode(insn->opcodeType,
instructionClass,
insn->opcode,
@@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
insn->opcode,
0);
}
-
+
return 0;
}
@@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
*/
static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
-
+
for (i = 0;; i++) {
if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
@@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
}
/*
- * getID - Determines the ID of an instruction, consuming the ModR/M byte as
- * appropriate for extended and escape opcodes. Determines the attributes and
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
*
* @param insn - The instruction whose ID is to be determined.
@@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
-
+
dbgprintf(insn, "getID()");
-
+
attrMask = ATTR_NONE;
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
-
+
if (insn->vexSize) {
attrMask |= ATTR_VEX;
if (insn->vexSize == 3) {
switch (ppFromVEX3of3(insn->vexPrefix[2])) {
case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
+ attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_XD;
break;
}
-
+
if (lFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_VEXL;
}
else if (insn->vexSize == 2) {
switch (ppFromVEX2of2(insn->vexPrefix[1])) {
case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
+ attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_XD;
break;
}
-
+
if (lFromVEX2of2(insn->vexPrefix[1]))
attrMask |= ATTR_VEXL;
}
@@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
* conservative, but in the specific case where OpSize is present but not
* in the right place we check if there's a 16-bit operation.
*/
-
+
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
const char *specName, *specWithOpSizeName;
-
+
spec = specifierForUID(instructionID);
-
+
if (getIDWithAttrMask(&instructionIDWithOpsize,
insn,
attrMask | ATTR_OPSIZE)) {
- /*
+ /*
* ModRM required with OpSize but not present; give up and return version
* without OpSize set
*/
-
+
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
-
+
specName = x86DisassemblerGetInstrName(instructionID, miiArg);
specWithOpSizeName =
x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
@@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
const struct InstructionSpecifier *specWithNewOpcode;
spec = specifierForUID(instructionID);
-
+
/* Borrow opcode from one of the other XCHGar opcodes */
insn->opcode = 0x91;
-
+
if (getIDWithAttrMask(&instructionIDWithNewOpcode,
insn,
attrMask)) {
@@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
return 0;
}
-
+
insn->instructionID = instructionID;
insn->spec = specifierForUID(insn->instructionID);
-
+
return 0;
}
@@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) {
SIBIndex sibIndexBase = 0;
SIBBase sibBaseBase = 0;
uint8_t index, base;
-
+
dbgprintf(insn, "readSIB()");
-
+
if (insn->consumedSIB)
return 0;
-
+
insn->consumedSIB = TRUE;
-
+
switch (insn->addressSize) {
case 2:
dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
@@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) {
if (consumeByte(insn, &insn->sib))
return -1;
-
+
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
-
+
switch (index) {
case 0x4:
insn->sibIndex = SIB_INDEX_NONE;
@@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibIndex = SIB_INDEX_NONE;
break;
}
-
+
switch (scaleFromSIB(insn->sib)) {
case 0:
insn->sibScale = 1;
@@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibScale = 8;
break;
}
-
+
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
-
+
switch (base) {
case 0x5:
switch (modFromModRM(insn->modRM)) {
@@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) {
break;
case 0x1:
insn->eaDisplacement = EA_DISP_8;
- insn->sibBase = (insn->addressSize == 4 ?
+ insn->sibBase = (insn->addressSize == 4 ?
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x2:
insn->eaDisplacement = EA_DISP_32;
- insn->sibBase = (insn->addressSize == 4 ?
+ insn->sibBase = (insn->addressSize == 4 ?
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x3:
@@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
-
+
return 0;
}
@@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) {
* readDisplacement - Consumes the displacement of an instruction.
*
* @param insn - The instruction whose displacement is to be read.
- * @return - 0 if the displacement byte was successfully read; nonzero
+ * @return - 0 if the displacement byte was successfully read; nonzero
* otherwise.
*/
-static int readDisplacement(struct InternalInstruction* insn) {
+static int readDisplacement(struct InternalInstruction* insn) {
int8_t d8;
int16_t d16;
int32_t d32;
-
+
dbgprintf(insn, "readDisplacement()");
-
+
if (insn->consumedDisplacement)
return 0;
-
+
insn->consumedDisplacement = TRUE;
insn->displacementOffset = insn->readerCursor - insn->startLocation;
-
+
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
insn->consumedDisplacement = FALSE;
@@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
insn->displacement = d32;
break;
}
-
+
insn->consumedDisplacement = TRUE;
return 0;
}
@@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) {
* @param insn - The instruction whose addressing information is to be read.
* @return - 0 if the information was successfully read; nonzero otherwise.
*/
-static int readModRM(struct InternalInstruction* insn) {
+static int readModRM(struct InternalInstruction* insn) {
uint8_t mod, rm, reg;
-
+
dbgprintf(insn, "readModRM()");
-
+
if (insn->consumedModRM)
return 0;
-
+
if (consumeByte(insn, &insn->modRM))
return -1;
insn->consumedModRM = TRUE;
-
+
mod = modFromModRM(insn->modRM);
rm = rmFromModRM(insn->modRM);
reg = regFromModRM(insn->modRM);
-
+
/*
* This goes by insn->registerSize to pick the correct register, which messes
* up if we're using (say) XMM or 8-bit register operands. That gets fixed in
@@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) {
insn->eaRegBase = EA_REG_RAX;
break;
}
-
+
reg |= rFromREX(insn->rexPrefix) << 3;
rm |= bFromREX(insn->rexPrefix) << 3;
-
+
insn->reg = (Reg)(insn->regBase + reg);
-
+
switch (insn->addressSize) {
case 2:
insn->eaBaseBase = EA_BASE_BX_SI;
-
+
switch (mod) {
case 0x0:
if (rm == 0x6) {
@@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) {
case 4:
case 8:
insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
-
+
switch (mod) {
case 0x0:
insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
switch (rm) {
case 0x4:
case 0xc: /* in case REXW.b is set */
- insn->eaBase = (insn->addressSize == 4 ?
+ insn->eaBase = (insn->addressSize == 4 ?
EA_BASE_sib : EA_BASE_sib64);
readSIB(insn);
if (readDisplacement(insn))
@@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) {
}
break;
} /* switch (insn->addressSize) */
-
+
return 0;
}
@@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
* @return - 0 if fixup was successful; -1 if the register returned was
* invalid for its class.
*/
-static int fixupReg(struct InternalInstruction *insn,
+static int fixupReg(struct InternalInstruction *insn,
const struct OperandSpecifier *op) {
uint8_t valid;
-
+
dbgprintf(insn, "fixupReg()");
-
+
switch ((OperandEncoding)op->encoding) {
default:
debug("Expected a REG or R/M encoding in fixupReg");
@@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn,
}
break;
}
-
+
return 0;
}
/*
- * readOpcodeModifier - Reads an operand from the opcode field of an
+ * readOpcodeModifier - Reads an operand from the opcode field of an
* instruction. Handles AddRegFrm instructions.
*
* @param insn - The instruction whose opcode field is to be read.
@@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn,
*/
static int readOpcodeModifier(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcodeModifier()");
-
+
if (insn->consumedOpcodeModifier)
return 0;
-
+
insn->consumedOpcodeModifier = TRUE;
-
+
switch (insn->spec->modifierType) {
default:
debug("Unknown modifier type.");
@@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) {
case MODIFIER_MODRM:
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
return 0;
- }
+ }
}
/*
- * readOpcodeRegister - Reads an operand from the opcode field of an
+ * readOpcodeRegister - Reads an operand from the opcode field of an
* instruction and interprets it appropriately given the operand width.
* Handles AddRegFrm instructions.
*
@@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
if (readOpcodeModifier(insn))
return -1;
-
+
if (size == 0)
size = insn->registerSize;
-
+
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
- if (insn->rexPrefix &&
+ if (insn->rexPrefix &&
insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ (insn->opcodeRegister - MODRM_REG_AL - 4));
}
-
+
break;
case 2:
insn->opcodeRegister = (Reg)(MODRM_REG_AX
- + ((bFromREX(insn->rexPrefix) << 3)
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 4:
insn->opcodeRegister = (Reg)(MODRM_REG_EAX
- + ((bFromREX(insn->rexPrefix) << 3)
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 8:
- insn->opcodeRegister = (Reg)(MODRM_REG_RAX
- + ((bFromREX(insn->rexPrefix) << 3)
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
}
-
+
return 0;
}
@@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
uint16_t imm16;
uint32_t imm32;
uint64_t imm64;
-
+
dbgprintf(insn, "readImmediate()");
-
+
if (insn->numImmediatesConsumed == 2) {
debug("Already consumed two immediates");
return -1;
}
-
+
if (size == 0)
size = insn->immediateSize;
else
insn->immediateSize = size;
insn->immediateOffset = insn->readerCursor - insn->startLocation;
-
+
switch (size) {
case 1:
if (consumeByte(insn, &imm8))
@@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
insn->immediates[insn->numImmediatesConsumed] = imm64;
break;
}
-
+
insn->numImmediatesConsumed++;
-
+
return 0;
}
@@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
*/
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
-
+
if (insn->vexSize == 3)
insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
else if (insn->vexSize == 2)
@@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) {
int index;
int hasVVVV, needVVVV;
int sawRegImm = 0;
-
+
dbgprintf(insn, "readOperands()");
/* If non-zero vvvv specified, need to make sure one of the operands
uses it. */
hasVVVV = !readVVVV(insn);
needVVVV = hasVVVV && (insn->vvvv != 0);
-
+
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
@@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) {
/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
if (needVVVV) return -1;
-
+
return 0;
}
@@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) {
* decodeInstruction - Reads and interprets a full instruction provided by the
* user.
*
- * @param insn - A pointer to the instruction to be populated. Must be
+ * @param insn - A pointer to the instruction to be populated. Must be
* pre-allocated.
* @param reader - The function to be used to read the instruction's bytes.
* @param readerArg - A generic argument to be passed to the reader to store
@@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
-
+
insn->reader = reader;
insn->readerArg = readerArg;
insn->dlog = logger;
@@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn,
insn->readerCursor = startLoc;
insn->mode = mode;
insn->numImmediatesConsumed = 0;
-
+
if (readPrefixes(insn) ||
readOpcode(insn) ||
getID(insn, miiArg) ||
@@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn,
return -1;
insn->operands = &x86OperandSets[insn->spec->operands][0];
-
+
insn->length = insn->readerCursor - insn->startLocation;
-
+
dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
startLoc, insn->readerCursor, insn->length);
-
+
if (insn->length > 15)
dbgprintf(insn, "Instruction exceeds 15-byte limit");
-
+
return 0;
}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index a4bd1147bc51..e357710b20eb 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -14,12 +14,12 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
-#include "X86InstComments.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
@@ -131,7 +131,7 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm())
- O << Op.getImm();
+ O << formatImm(Op.getImm());
else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
// If a symbolic branch target was added as a constant expression then print
@@ -157,7 +157,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else if (Op.isImm()) {
// Print X86 immediates as signed values.
O << markup("<imm:")
- << '$' << (int64_t)Op.getImm()
+ << '$' << formatImm((int64_t)Op.getImm())
<< markup(">");
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
@@ -189,7 +189,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (DispSpec.isImm()) {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
- O << DispVal;
+ O << formatImm(DispVal);
} else {
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
O << *DispSpec.getExpr();
@@ -207,7 +207,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (ScaleVal != 1) {
O << ','
<< markup("<imm:")
- << ScaleVal
+ << ScaleVal // never printed in hex.
<< markup(">");
}
}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 64ac5e685f76..0f6eeb19bccd 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
switch (MI->getOpcode()) {
case X86::INSERTPSrr:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- Src2Name = getRegName(MI->getOperand(2).getReg());
- DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
- break;
case X86::VINSERTPSrr:
DestName = getRegName(MI->getOperand(0).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::MOVLHPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeMOVLHPSMask(2, ShuffleMask);
- break;
case X86::VMOVLHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::MOVHLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeMOVHLPSMask(2, ShuffleMask);
- break;
case X86::VMOVHLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -69,6 +57,29 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::PALIGNR128rr:
+ case X86::VPALIGNR128rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PALIGNR128rm:
+ case X86::VPALIGNR128rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPALIGNR256rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPALIGNR256rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
case X86::PSHUFDri:
case X86::VPSHUFDri:
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -131,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::PUNPCKHBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHBWrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
- break;
case X86::VPUNPCKHBWrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHBWrm:
case X86::VPUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -154,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKHWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHWDrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
- break;
case X86::VPUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHWDrm:
case X86::VPUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -177,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKHDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
- break;
case X86::VPUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHDQrm:
case X86::VPUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -200,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKHQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHQDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
- break;
case X86::VPUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
case X86::VPUNPCKHQDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -224,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::PUNPCKLBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLBWrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
- break;
case X86::VPUNPCKLBWrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLBWrm:
case X86::VPUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLWDrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
- break;
case X86::VPUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLWDrm:
case X86::VPUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -270,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
- break;
case X86::VPUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLDQrm:
case X86::VPUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -293,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLQDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
- break;
case X86::VPUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
case X86::VPUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -317,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPDrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::SHUFPDrmi:
- DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::SHUFPDrmi:
case X86::VSHUFPDrmi:
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -344,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPSrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::SHUFPSrmi:
- DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::SHUFPSrmi:
case X86::VSHUFPSrmi:
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -371,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::UNPCKLPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPDrm:
- DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKLPDrm:
case X86::VUNPCKLPDrm:
DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -394,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPSrm:
- DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKLPSrm:
case X86::VUNPCKLPSrm:
DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -417,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPDrm:
- DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKHPDrm:
case X86::VUNPCKHPDrm:
DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -440,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPSrm:
- DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKHPSrm:
case X86::VUNPCKHPSrm:
DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index d67aec7f10ef..141f4a4dd856 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -14,11 +14,11 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
-#include "X86InstComments.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 467edadc7e09..598ddee56d21 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -113,7 +113,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -255,7 +255,7 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// Relax if the value is too big for a (signed) i8.
return int64_t(Value) != int64_t(int8_t(Value));
@@ -279,9 +279,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res.setOpcode(RelaxedOp);
}
-/// writeNopData - Write optimal nops to the output file for the \p Count
-/// bytes. This returns the number of bytes written. It may return 0 if
-/// the \p Count is more than the maximum optimal nops.
+/// \brief Write a sequence of optimal nops to the output, covering \p Count
+/// bytes.
+/// \return - true on success, false on failure
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
static const uint8_t Nops[10][10] = {
// nop
@@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
- // Write an optimal sequence for the first 15 bytes.
- const uint64_t OptimalCount = (Count < 16) ? Count : 15;
- const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
- for (uint64_t i = 0, e = Prefixes; i != e; i++)
- OW->Write8(0x66);
- const uint64_t Rest = OptimalCount - Prefixes;
- for (uint64_t i = 0, e = Rest; i != e; i++)
- OW->Write8(Nops[Rest - 1][i]);
-
- // Finish with single byte nops.
- for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
- OW->Write8(0x90);
+ // 15 is the longest single nop instruction. Emit as many 15-byte nops as
+ // needed, then emit a nop of the remaining length.
+ do {
+ const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+ const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
+ for (uint8_t i = 0; i < Prefixes; i++)
+ OW->Write8(0x66);
+ const uint8_t Rest = ThisNopLength - Prefixes;
+ for (uint8_t i = 0; i < Rest; i++)
+ OW->Write8(Nops[Rest - 1][i]);
+ Count -= ThisNopLength;
+ } while (Count != 0);
return true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 7ea1961dec90..36695600707e 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -104,7 +104,7 @@ namespace X86II {
/// MO_TLSLD - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to to
+ /// contains the symbol. When this index is passed to a call to
/// __tls_get_addr, the function will return the base address of the TLS
/// block for the symbol. Used in the x86-64 local dynamic TLS access model.
///
@@ -114,7 +114,7 @@ namespace X86II {
/// MO_TLSLDM - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to to
+ /// contains the symbol. When this index is passed to a call to
/// ___tls_get_addr, the function will return the base address of the TLS
/// block for the symbol. Used in the IA32 local dynamic TLS access model.
///
@@ -276,9 +276,9 @@ namespace X86II {
MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
- MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
- MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
- MRM_DE = 55, MRM_DF = 56,
+ MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
+ MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
+ MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -574,16 +574,13 @@ namespace X86II {
++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
return FirstMemOp;
}
- case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4:
- case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_E8: case X86II::MRM_F0:
- case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5:
- case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+ case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
case X86II::MRM_DE: case X86II::MRM_DF:
return -1;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 16488eb7ae7e..7815ae98c9bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
- PointerSize = 8;
+ PointerSize = CalleeSaveStackSlotSize = 8;
AssemblerDialect = AsmWriterFlavor;
@@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
void X86ELFMCAsmInfo::anchor() { }
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
- if (T.getArch() == Triple::x86_64)
- PointerSize = 8;
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ bool isX32 = T.getEnvironment() == Triple::GNUX32;
+
+ // For ELF, x86-64 pointer size depends on the ABI.
+ // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
+ // with the x32 ABI, pointer size remains the default 4.
+ PointerSize = (is64Bit && !isX32) ? 8 : 4;
+
+ // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
+ CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204ae75c8..776cee1e35cc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// dst(ModR/M), src1(ModR/M)
// dst(ModR/M), src1(ModR/M), imm8
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
- CurOp += 2;
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMDestMem:
@@ -1117,16 +1136,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
TSFlags, CurByte, OS, Fixups);
CurOp += X86::AddrNumOperands;
break;
- case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4:
- case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5:
- case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD:
- case X86II::MRM_DE: case X86II::MRM_DF:
- case X86II::MRM_E8: case X86II::MRM_F0:
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
case X86II::MRM_F8: case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
@@ -1143,6 +1159,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
case X86II::MRM_D5: MRM = 0xD5; break;
+ case X86II::MRM_D6: MRM = 0xD6; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 287c9f137a58..5e84530cd729 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#include "X86MCTargetDesc.h"
-#include "X86MCAsmInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "InstPrinter/X86IntelInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Host.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_REGINFO_MC_DESC
@@ -257,7 +257,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitX86MCRegisterInfo(X, RA,
X86_MC::getDwarfRegFlavour(TT, false),
- X86_MC::getDwarfRegFlavour(TT, true));
+ X86_MC::getDwarfRegFlavour(TT, true),
+ RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7ff058edbc23..64f005c469bc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,18 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Object/MachOFormat.h"
using namespace llvm;
using namespace llvm::object;
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 949661eb99e9..e518fecf044f 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -16,8 +16,7 @@ BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \
X86GenAsmWriter.inc X86GenAsmMatcher.inc \
X86GenAsmWriter1.inc X86GenDAGISel.inc \
X86GenDisassemblerTables.inc X86GenFastISel.inc \
- X86GenCallingConv.inc X86GenSubtargetInfo.inc \
- X86GenEDInfo.inc
+ X86GenCallingConv.inc X86GenSubtargetInfo.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 40110353fc62..496b704ee85f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should
turn into hsubpd also.
//===---------------------------------------------------------------------===//
+
+define <2 x i32> @foo(<2 x double> %in) {
+ %x = fptosi <2 x double> %in to <2 x i32>
+ ret <2 x i32> %x
+}
+
+Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 6a8a4fdf2520..b4285a071879 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1568,43 +1568,6 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
Core 2, and "Generic"
//===---------------------------------------------------------------------===//
-
-Testcase:
-int a(int x) { return (x & 127) > 31; }
-
-Current output:
- movl 4(%esp), %eax
- andl $127, %eax
- cmpl $31, %eax
- seta %al
- movzbl %al, %eax
- ret
-
-Ideal output:
- xorl %eax, %eax
- testl $96, 4(%esp)
- setne %al
- ret
-
-This should definitely be done in instcombine, canonicalizing the range
-condition into a != condition. We get this IR:
-
-define i32 @a(i32 %x) nounwind readnone {
-entry:
- %0 = and i32 %x, 127 ; <i32> [#uses=1]
- %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1]
- %2 = zext i1 %1 to i32 ; <i32> [#uses=1]
- ret i32 %2
-}
-
-Instcombine prefers to strength reduce relational comparisons to equality
-comparisons when possible, this should be another case of that. This could
-be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
-looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
-be redesigned to use ComputeMaskedBits and friends.
-
-
-//===---------------------------------------------------------------------===//
Testcase:
int x(int a) { return (a&0xf0)>>4; }
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 52a67f763b0a..815d23588f11 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 8b87c1f9c8ad..bbd490411f2d 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -61,6 +61,24 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(NElts+i);
}
+void DecodePALIGNRMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Base = i + Offset;
+ // if i+offset is out of this lane then we actually need the other source
+ if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+ ShuffleMask.push_back(Base + l);
+ }
+ }
+}
+
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 70d8171a8154..017ab325ec51 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index dce5b4d2b008..1f9919f15955 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -63,11 +63,12 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
-/// which determines whether the frame pointer register should be
-/// reserved in case dynamic stack alignment is later required.
-///
-FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+/// \brief Creates an X86-specific Target Transformation Info pass.
+ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
+
+/// createX86PadShortFunctions - Return a pass that pads short functions
+/// with NOOPs. This will prevent a stall when returning on the Atom.
+FunctionPass *createX86PadShortFunctions();
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8ad0bc08ac57..1dcc344e7f0d 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,11 +120,25 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
+def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
+ "Support HLE">;
+def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
+ "Support ADX instructions">;
+def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+ "Support PRFCHW instructions">;
+def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+ "Support RDSEED instruction">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
- "HasSlowDivide", "true",
- "Use small divide for positive values less than 256">;
+ "HasSlowDivide", "true",
+ "Use small divide for positive values less than 256">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+ "PadShortFunctions", "true",
+ "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+ "CallRegIndirect", "true",
+ "Call register indirect">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -138,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-class AtomProc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, AtomModel, Features>;
-
def : Proc<"generic", []>;
def : Proc<"i386", []>;
def : Proc<"i486", []>;
@@ -155,47 +166,63 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+ [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+ [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+ [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+ [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+ FeatureSlowDivide,
+ FeatureCallRegIndirect,
+ FeaturePadShortFunctions]>;
+
// "Arrandale" along with corei3 and corei5
-def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES]>;
-def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT]>;
+def : ProcessorModel<"corei7", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"westmere", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+ FeaturePCLMUL]>;
// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
// Ivy Bridge
-def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL,
- FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase]>;
// Haswell
-def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND,
- FeatureF16C, FeatureFSGSBase,
- FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA,
- FeatureRTM]>;
+def : ProcessorModel<"core-avx2", HaswellModel,
+ [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+ FeatureHLE]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index fdd712520b44..6b228b0b0329 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,33 +13,33 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP_PIC_BASE:
O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
- case X86II::MO_SECREL: O << "@SECREL"; break;
+ case X86II::MO_SECREL: O << "@SECREL32"; break;
}
}
@@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
case MachineOperand::MO_Immediate:
- O << '$' << MO.getImm();
+ if (AsmVariant == 0) O << '$';
+ O << MO.getImm();
return;
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol: {
- O << '$';
+ if (AsmVariant == 0) O << '$';
printSymbolOperand(MO, O);
break;
}
@@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
NeedPlus = true;
}
- assert (DispSpec.isImm() && "Displacement is not an immediate!");
- int64_t DispVal = DispSpec.getImm();
- if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
- if (NeedPlus) {
- if (DispVal > 0)
- O << " + ";
- else {
- O << " - ";
- DispVal = -DispVal;
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ printOperand(MI, Op+3, O, Modifier, AsmVariant);
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
}
+ O << DispVal;
}
- O << DispVal;
- }
+ }
O << ']';
}
@@ -543,7 +548,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MCSA_IndirectSymbol);
// hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
- OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5));
}
Stubs.clear();
@@ -569,7 +574,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long 0
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -578,8 +583,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// using NLPs. However, sometimes the types are local to the file. So
// we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -596,8 +600,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -663,7 +666,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
name += ",DATA";
else
name += ",data";
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
@@ -672,7 +675,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
else
name = " -export:";
name += DLLExportedFns[i]->getName();
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
}
}
@@ -692,7 +695,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(), 0);
+ TD->getPointerSize());
}
Stubs.clear();
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 61eb14e036d0..bc7496bad144 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// AT&T assembly code printer class.
-//
-//===----------------------------------------------------------------------===//
#ifndef X86ASMPRINTER_H
#define X86ASMPRINTER_H
@@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
}
virtual const char *getPassName() const LLVM_OVERRIDE {
- return "X86 AT&T-Style Assembly Printer";
+ return "X86 Assembly / Object Emitter";
}
const X86Subtarget &getSubtarget() const { return *Subtarget; }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index a5a8dc18e41d..0dfeb42f1a4d 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,8 +15,8 @@
#define X86COFF_MACHINEMODULEINFO_H
#include "X86MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6786756c7faf..9eafbd55a5ae 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<RetCC_X86Common>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_HiPE : CallingConv<[
+ // Promote all types to i32
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[
CCDelegateTo<RetCC_X86_64_C>
]>;
+// X86-64 HiPE return-value convention.
+def RetCC_X86_64_HiPE : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
+]>;
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // If HiPE, use RetCC_X86_32_HiPE.
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
]>;
// This is the root return-value convention for the X86-64 backend.
def RetCC_X86_64 : CallingConv<[
+ // HiPE uses RetCC_X86_64_HiPE
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -254,29 +276,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
-// X86-64 Intel OpenCL built-ins calling convention.
-def CC_Intel_OCL_BI : CallingConv<[
- CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
-
- CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
- CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
-
- CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
- CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
-
- // The SSE vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
-
- // The 256-bit vector arguments are passed in YMM registers.
- CCIfType<[v8f32, v4f64, v8i32, v4i64],
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
-
- CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
- CCDelegateTo<CC_X86_64_C>
-]>;
-
-
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -291,6 +290,18 @@ def CC_X86_64_GHC : CallingConv<[
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
+def CC_X86_64_HiPE : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -376,8 +387,8 @@ def CC_X86_32_ThisCall : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
- // Pass sret arguments indirectly through EAX
- CCIfSRet<CCAssignToReg<[EAX]>>,
+ // Pass sret arguments indirectly through stack.
+ CCIfSRet<CCAssignToStack<4, 4>>,
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -422,6 +433,42 @@ def CC_X86_32_GHC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
+def CC_X86_32_HiPE : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+ CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
+
+ CCIfType<[i32], CCIfSubtarget<"is64Bit()", CCAssignToReg<[EDI, ESI, EDX, ECX]>>>,
+ CCIfType<[i64], CCIfSubtarget<"is64Bit()", CCAssignToReg<[RDI, RSI, RDX, RCX]>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64_C>>,
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@@ -432,6 +479,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@@ -440,6 +488,7 @@ def CC_X86_32 : CallingConv<[
// This is the root argument convention for the X86-64 backend.
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -470,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
+ R11, R12, R13, R14, R15, RBP,
+ (sequence "XMM%u", 0, 15))>;
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 44db563818b1..2518e02e2a40 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -13,23 +13,23 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-emitter"
+#include "X86.h"
#include "X86InstrInfo.h"
#include "X86JITInfo.h"
+#include "X86Relocations.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "X86Relocations.h"
-#include "X86.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -124,7 +124,7 @@ template<class CodeEmitter>
} // end anonymous namespace.
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified templated MachineCodeEmitter object.
+/// to the specified JITCodeEmitter object.
FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE) {
return new Emitter<JITCodeEmitter>(TM, JCE);
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
const MCInstrDesc *Desc) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
- CurOp += 2;
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index f321778db24b..69b4c71651d7 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -11,7 +11,7 @@
;;
;;===----------------------------------------------------------------------===
-extrn X86CompilationCallback2: PROC
+extrn LLVMX86CompilationCallback2: PROC
.code
X86CompilationCallback proc
@@ -42,7 +42,7 @@ X86CompilationCallback proc
; Pass prev frame and return address.
mov rcx, rbp
mov rdx, qword ptr [rbp+8]
- call X86CompilationCallback2
+ call LLVMX86CompilationCallback2
; Restore all XMM arg registers.
movaps xmm3, [rsp+48+32]
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index d4627c74cb1c..cadec682a435 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -14,24 +14,24 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
-#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
+#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -75,6 +75,8 @@ public:
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
+ virtual bool FastLowerArguments();
+
#include "X86GenFastISel.inc"
private:
@@ -297,7 +299,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
case MVT::i32: Opc = X86::MOV32mi; break;
case MVT::i64:
// Must be a 32-bit sign extended value.
- if ((int)CI->getSExtValue() == CI->getSExtValue())
+ if (isInt<32>(CI->getSExtValue()))
Opc = X86::MOV64mi32;
break;
}
@@ -326,12 +328,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
+ if (RR == 0)
return false;
+
+ ResultReg = RR;
+ return true;
}
/// X86SelectAddress - Attempt to fill in an address from the given value.
@@ -727,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Don't handle popping bytes on return for now.
if (X86MFInfo->getBytesToPopOnReturn() != 0)
- return 0;
+ return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
@@ -738,10 +739,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (F.isVarArg())
return false;
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -806,25 +809,30 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
DstReg).addReg(SrcReg);
- // Mark the register as live out of the function.
- MRI.addLiveOut(VA.getLocReg());
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
}
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
- // and into %rax.
- if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+ // and into %rax. We also do the same with %eax for Win32.
+ if (F.hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
+ unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- X86::RAX).addReg(Reg);
- MRI.addLiveOut(X86::RAX);
+ RetReg).addReg(Reg);
+ RetRegs.push_back(RetReg);
}
// Now emit the RET.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
@@ -1373,7 +1381,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
else if (Len >= 2)
VT = MVT::i16;
else {
- assert(Len == 1);
VT = MVT::i8;
}
@@ -1517,6 +1524,81 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
}
+bool X86FastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ if (!Subtarget->is64Bit())
+ return false;
+
+ // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 6)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ case MVT::i64:
+ break;
+ default:
+ return false;
+ }
+ }
+
+ static const uint16_t GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const uint16_t GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
+ };
+
+ Idx = 0;
+ const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
+ const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
+ unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+}
+
bool X86FastISel::X86SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
@@ -1529,6 +1611,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
return X86VisitIntrinsicCall(*II);
+ // Allow SelectionDAG isel to handle tail calls.
+ if (cast<CallInst>(I)->isTailCall())
+ return false;
+
return DoSelectCall(I, 0);
}
@@ -1541,9 +1627,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
CallingConv::ID CC = CS.getCallingConv();
if (CC == CallingConv::Fast || CC == CallingConv::GHC)
return 0;
- if (!CS.paramHasAttr(1, Attributes::StructRet))
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
return 0;
- if (CS.paramHasAttr(1, Attributes::InReg))
+ if (CS.paramHasAttr(1, Attribute::InReg))
return 0;
return 4;
}
@@ -1581,8 +1667,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
*FuncInfo.MF, FTy->isVarArg(),
Outs, FTy->getContext());
@@ -1622,12 +1707,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) {
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
PointerType *Ty = cast<PointerType>(ArgVal->getType());
Type *ElementTy = Ty->getElementType();
unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
@@ -1641,9 +1726,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
return false;
}
- if (CS.paramHasAttr(AttrInd, Attributes::InReg))
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
Flags.setInReg();
- if (CS.paramHasAttr(AttrInd, Attributes::Nest))
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
Flags.setNest();
// If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
@@ -1905,17 +1990,17 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
ComputeValueVTs(TLI, I->getType(), RetTys);
for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
EVT VT = RetTys[i];
- EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
for (unsigned j = 0; j != NumRegs; ++j) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.VT = RegisterVT;
MyFlags.Used = !CS.getInstruction()->use_empty();
- if (CS.paramHasAttr(0, Attributes::SExt))
+ if (CS.paramHasAttr(0, Attribute::SExt))
MyFlags.Flags.setSExt();
- if (CS.paramHasAttr(0, Attributes::ZExt))
+ if (CS.paramHasAttr(0, Attribute::ZExt))
MyFlags.Flags.setZExt();
- if (CS.paramHasAttr(0, Attributes::InReg))
+ if (CS.paramHasAttr(0, Attribute::InReg))
MyFlags.Flags.setInReg();
Ins.push_back(MyFlags);
}
@@ -2154,13 +2239,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
MVT VT;
if (!isTypeLegal(CF->getType(), VT))
- return false;
+ return 0;
// Get opcode and regclass for the given zero.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = X86::FsFLD0SS;
@@ -2181,7 +2266,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
break;
case MVT::f80:
// No f80 support yet.
- return false;
+ return 0;
}
unsigned ResultReg = createResultReg(RC);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 791f5982af7c..0585b43a4640 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,17 +26,17 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
-#include "llvm/InlineAsm.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -111,7 +111,7 @@ namespace {
EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
- unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
unsigned Mask = 0;
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I) {
@@ -198,7 +198,7 @@ namespace {
}
/// getScratchReg - Return an FP register that is not currently in use.
- unsigned getScratchReg() {
+ unsigned getScratchReg() const {
for (int i = NumFPRegs - 1; i >= 8; --i)
if (!isLive(i))
return i;
@@ -206,7 +206,7 @@ namespace {
}
/// isScratchReg - Returns trus if RegNo is a scratch FP register.
- bool isScratchReg(unsigned RegNo) {
+ static bool isScratchReg(unsigned RegNo) {
return RegNo > 8 && RegNo < NumFPRegs;
}
@@ -311,7 +311,7 @@ namespace {
void handleSpecialFP(MachineBasicBlock::iterator &I);
// Check if a COPY instruction is using FP registers.
- bool isFPCopy(MachineInstr *MI) {
+ static bool isFPCopy(MachineInstr *MI) {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned SrcReg = MI->getOperand(1).getReg();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 369589d469a6..54cbd40274a7 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -17,18 +17,18 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -50,13 +50,13 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() ||
+ MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.callsEHReturn());
}
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
+static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
if (isInt<8>(Imm))
return X86::SUB64ri8;
return X86::SUB64ri32;
@@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
+static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
if (isInt<8>(Imm))
return X86::ADD64ri8;
return X86::ADD64ri32;
@@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
-static unsigned getLEArOpcode(unsigned is64Bit) {
- return is64Bit ? X86::LEA64r : X86::LEA32r;
+static unsigned getLEArOpcode(unsigned IsLP64) {
+ return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
@@ -145,17 +145,17 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, bool UseLEA,
+ bool Is64Bit, bool IsLP64, bool UseLEA,
const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc;
if (UseLEA)
- Opc = getLEArOpcode(Is64Bit);
+ Opc = getLEArOpcode(IsLP64);
else
Opc = isSub
- ? getSUBriOpcode(Is64Bit, Offset)
- : getADDriOpcode(Is64Bit, Offset);
+ ? getSUBriOpcode(IsLP64, Offset)
+ : getADDriOpcode(IsLP64, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
return CompactUnwindEncoding;
}
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
+ re = MRI.reg_end(); ri != re; ++ri)
+ if (ri->isCopy())
+ return true;
+
+ return false;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -644,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
bool IsWin64 = STI.isTargetWin64();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
@@ -673,12 +690,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) &&
+ // stack pointer (we fit in the Red Zone). We also check that we don't
+ // push and pop from the stack.
+ if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
+ !usesTheStack(MF) && // Don't push and pop.
!MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
@@ -692,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta)
@@ -908,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
if (isEAXAlive) {
@@ -920,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MBB.insert(MBBI, MI);
}
} else if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
// If we need a base pointer, set it up here. It's whatever the value
@@ -977,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
@@ -1062,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop;
if (CSSize != 0) {
- unsigned Opc = getLEArOpcode(Is64Bit);
+ unsigned Opc = getLEArOpcode(IsLP64);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize);
} else {
@@ -1072,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+ TII, *RegInfo);
}
// We're returning from function via eh_return.
@@ -1107,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+ UseLEA, TII, *RegInfo);
}
// Jump to label or value in register.
@@ -1138,7 +1161,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
MachineInstr *NewMI = prior(MBBI);
- NewMI->copyImplicitOps(MBBI);
+ NewMI->copyImplicitOps(MF, MBBI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
@@ -1150,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+ *RegInfo);
}
}
@@ -1362,17 +1386,25 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
-
-/// GetScratchRegister - Get a register for performing work in the segmented
-/// stack prologue. Depending on platform and the properties of the function
-/// either one or two registers will be needed. Set primary to true for
-/// the first register, false for the second.
+/// GetScratchRegister - Get a temp register for performing work in the
+/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
+/// and the properties of the function either one or two registers will be
+/// needed. Set primary to true for the first register, false for the second.
static unsigned
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+
+ // Erlang stuff.
+ if (CallingConvention == CallingConv::HiPE) {
+ if (Is64Bit)
+ return Primary ? X86::R14 : X86::R13;
+ else
+ return Primary ? X86::EBX : X86::EDI;
+ }
+
if (Is64Bit)
return Primary ? X86::R11 : X86::R12;
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
if (CallingConvention == CallingConv::X86_FastCall ||
@@ -1400,7 +1432,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
bool Is64Bit = STI.is64Bit();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
- const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
@@ -1408,8 +1439,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
- !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+ if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
+ !STI.isTargetWin32() && !STI.isTargetFreeBSD())
report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
@@ -1447,13 +1478,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- if (ST->isTargetLinux()) {
+ if (STI.isTargetLinux()) {
TlsReg = X86::FS;
TlsOffset = 0x70;
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
- } else if (ST->isTargetFreeBSD()) {
+ } else if (STI.isTargetFreeBSD()) {
TlsReg = X86::FS;
TlsOffset = 0x18;
} else {
@@ -1469,16 +1500,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
- if (ST->isTargetLinux()) {
+ if (STI.isTargetLinux()) {
TlsReg = X86::GS;
TlsOffset = 0x30;
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x48 + 90*4;
- } else if (ST->isTargetWin32()) {
+ } else if (STI.isTargetWin32()) {
TlsReg = X86::FS;
TlsOffset = 0x14; // pvArbitrary, reserved for application use
- } else if (ST->isTargetFreeBSD()) {
+ } else if (STI.isTargetFreeBSD()) {
report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
} else {
report_fatal_error("Segmented stacks not supported on this platform.");
@@ -1490,10 +1521,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- if (ST->isTargetLinux() || ST->isTargetWin32()) {
+ if (STI.isTargetLinux() || STI.isTargetWin32()) {
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
// TlsOffset doesn't fit into a mod r/m byte so we need an extra register
unsigned ScratchReg2;
@@ -1579,3 +1610,228 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.verify();
#endif
}
+
+/// Erlang programs may need a special prologue to handle the stack size they
+/// might need at runtime. That is because Erlang/OTP does not implement a C
+/// stack but uses a custom implementation of hybrid stack/heap architecture.
+/// (for more information see Eric Stenman's Ph.D. thesis:
+/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+///
+/// CheckStack:
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// OldStart:
+/// ...
+/// IncStack:
+/// call inc_stack # doubles the stack space
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const bool Is64Bit = STI.is64Bit();
+ DebugLoc DL;
+ // HiPE-specific values
+ const unsigned HipeLeafWords = 24;
+ const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
+ const unsigned Guaranteed = HipeLeafWords * SlotSize;
+ unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
+ MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
+ unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
+
+ assert(STI.isTargetLinux() &&
+ "HiPE prologue is only supported on Linux operating systems.");
+
+ // Compute the largest caller's frame that is needed to fit the callees'
+ // frames. This 'MaxStack' is computed from:
+ //
+ // a) the fixed frame size, which is the space needed for all spilled temps,
+ // b) outgoing on-stack parameter areas, and
+ // c) the minimum stack space this function needs to make available for the
+ // functions it calls (a tunable ABI property).
+ if (MFI->hasCalls()) {
+ unsigned MoreStackForCalls = 0;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
+ MBBI != MBBE; ++MBBI)
+ for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
+ MI != ME; ++MI) {
+ if (!MI->isCall())
+ continue;
+
+ // Get callee operand.
+ const MachineOperand &MO = MI->getOperand(0);
+
+ // Only take account of global function calls (no closures etc.).
+ if (!MO.isGlobal())
+ continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // Do not update 'MaxStack' for primitive and built-in functions
+ // (encoded with names either starting with "erlang."/"bif_" or not
+ // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
+ // "_", such as the BIF "suspend_0") as they are executed on another
+ // stack.
+ if (F->getName().find("erlang.") != StringRef::npos ||
+ F->getName().find("bif_") != StringRef::npos ||
+ F->getName().find_first_of("._") == StringRef::npos)
+ continue;
+
+ unsigned CalleeStkArity =
+ F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
+ if (HipeLeafWords - 1 > CalleeStkArity)
+ MoreStackForCalls = std::max(MoreStackForCalls,
+ (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+ }
+ MaxStack += MoreStackForCalls;
+ }
+
+ // If the stack frame needed is larger than the guaranteed then runtime checks
+ // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
+ if (MaxStack > Guaranteed) {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
+
+ for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
+ E = prologueMBB.livein_end(); I != E; I++) {
+ stackCheckMBB->addLiveIn(*I);
+ incStackMBB->addLiveIn(*I);
+ }
+
+ MF.push_front(incStackMBB);
+ MF.push_front(stackCheckMBB);
+
+ unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
+ unsigned LEAop, CMPop, CALLop;
+ if (Is64Bit) {
+ SPReg = X86::RSP;
+ PReg = X86::RBP;
+ LEAop = X86::LEA64r;
+ CMPop = X86::CMP64rm;
+ CALLop = X86::CALL64pcrel32;
+ SPLimitOffset = 0x90;
+ } else {
+ SPReg = X86::ESP;
+ PReg = X86::EBP;
+ LEAop = X86::LEA32r;
+ CMPop = X86::CMP32rm;
+ CALLop = X86::CALLpcrel32;
+ SPLimitOffset = 0x4c;
+ }
+
+ ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "HiPE prologue scratch register is live-in");
+
+ // Create new MBB for StackCheck:
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ // SPLimitOffset is in a fixed heap location (pointed by BP).
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+
+ // Create new MBB for IncStack:
+ BuildMI(incStackMBB, DL, TII.get(CALLop)).
+ addExternalSymbol("inc_stack_0");
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+
+ stackCheckMBB->addSuccessor(&prologueMBB, 99);
+ stackCheckMBB->addSuccessor(incStackMBB, 1);
+ incStackMBB->addSuccessor(&prologueMBB, 99);
+ incStackMBB->addSuccessor(incStackMBB, 1);
+ }
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
+
+void X86FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+ unsigned StackPtr = RegInfo.getStackRegister();
+ bool reseveCallFrame = hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ bool IsLP64 = STI.isTarget64BitLP64();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
+
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(IsLP64, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
+ MBB.insert(I, New);
+ }
+
+ return;
+ }
+
+ if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // We are not tracking the stack pointer adjustment by the callee, so make
+ // sure we restore the stack pointer immediately after the call, there may
+ // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ MachineBasicBlock::iterator B = MBB.begin();
+ while (I != B && !llvm::prior(I)->isCall())
+ --I;
+ MBB.insert(I, New);
+ }
+}
+
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index dc515dc39c79..3f08b9a2e8d2 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -43,6 +43,8 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF) const;
+ void adjustForHiPEPrologue(MachineFunction &MF) const;
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
@@ -63,6 +65,10 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 99f557417b7c..6041669f8182 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -19,24 +19,21 @@
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
@@ -283,13 +280,13 @@ namespace {
/// getTargetMachine - Return a reference to the TargetMachine, casted
/// to the target-specific type.
- const X86TargetMachine &getTargetMachine() {
+ const X86TargetMachine &getTargetMachine() const {
return static_cast<const X86TargetMachine &>(TM);
}
/// getInstrInfo - Return a reference to the TargetInstrInfo, casted
/// to the target-specific type.
- const X86InstrInfo *getInstrInfo() {
+ const X86InstrInfo *getInstrInfo() const {
return getTargetMachine().getInstrInfo();
}
};
@@ -423,6 +420,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (!Chain.getNumOperands())
return false;
+ // Since we are not checking for AA here, conservatively abort if the chain
+ // writes to memory. It's not safe to move the callee (a load) across a store.
+ if (isa<MemSDNode>(Chain.getNode()) &&
+ cast<MemSDNode>(Chain.getNode())->writeMem())
+ return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
@@ -434,17 +436,19 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ OptForSize = MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
if (OptLevel != CodeGenOpt::None &&
- (N->getOpcode() == X86ISD::CALL ||
+ // Only does this when target favors doesn't favor register indirect
+ // call.
+ ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
- // Only does this if load can be foled into TC_RETURN.
+ // Only does this if load can be folded into TC_RETURN.
(Subtarget->is64Bit() ||
getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
/// Also try moving call address load from outside callseq_start to just
@@ -1040,8 +1044,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal;
return false;
}
- break;
}
+ break;
case ISD::SRL: {
// Scale must not be used already.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b35fb514bf94..69341869aa3e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -14,20 +14,15 @@
#define DEBUG_TYPE "x86-isel"
#include "X86ISelLowering.h"
+#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
-#include "Utils/X86ShuffleDecode.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -35,14 +30,19 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VariadicFunction.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
* ElemsPerChunk);
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -181,9 +186,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
- // Bypass i32 with i8 on Atom when compiling with O2
- if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+ // Bypass expensive divides on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
+ if (Subtarget->is64Bit())
+ addBypassSlowDiv(64, 16);
+ }
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
@@ -368,7 +376,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
- setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f80, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
@@ -456,7 +470,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
- // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
@@ -605,10 +619,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
@@ -633,8 +649,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
@@ -644,8 +661,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
@@ -659,10 +677,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f32 , Expand);
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f32 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
@@ -699,8 +719,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f80 , Expand);
- setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
@@ -725,74 +746,81 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
- for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
- VT <= MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ setOperationAction(ISD::ADD , VT, Expand);
+ setOperationAction(ISD::SUB , VT, Expand);
+ setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSUB, VT, Expand);
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::LOAD, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::SHL, VT, Expand);
+ setOperationAction(ISD::SRA, VT, Expand);
+ setOperationAction(ISD::SRL, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
+ setTruncStoreAction(VT,
(MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -865,6 +893,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
@@ -973,7 +1002,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1016,26 +1053,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
- if (Subtarget->hasAVX2()) {
- setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v4i32, Legal);
-
- setOperationAction(ISD::SRA, MVT::v4i32, Legal);
- } else {
- setOperationAction(ISD::SRL, MVT::v2i64, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Custom);
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- }
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
- if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
@@ -1053,6 +1085,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
@@ -1062,14 +1098,20 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
@@ -1088,6 +1130,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
+
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
@@ -1102,16 +1146,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
- setOperationAction(ISD::FMA, MVT::v8f32, Custom);
- setOperationAction(ISD::FMA, MVT::v4f64, Custom);
- setOperationAction(ISD::FMA, MVT::v4f32, Custom);
- setOperationAction(ISD::FMA, MVT::v2f64, Custom);
- setOperationAction(ISD::FMA, MVT::f32, Custom);
- setOperationAction(ISD::FMA, MVT::f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
}
- if (Subtarget->hasAVX2()) {
+ if (Subtarget->hasInt256()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
@@ -1129,13 +1180,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- setOperationAction(ISD::SRL, MVT::v4i64, Legal);
- setOperationAction(ISD::SRL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v4i64, Legal);
- setOperationAction(ISD::SHL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SRA, MVT::v8i32, Legal);
+ setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1151,15 +1196,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
+ }
- setOperationAction(ISD::SRL, MVT::v4i64, Custom);
- setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
- setOperationAction(ISD::SHL, MVT::v4i64, Custom);
- setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
- setOperationAction(ISD::SRA, MVT::v8i32, Custom);
- }
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1217,7 +1264,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
//
@@ -1246,6 +1292,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setLibcallName(RTLIB::SRA_I128, 0);
}
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -1266,6 +1325,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1277,28 +1337,25 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
- maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
- maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- benefitFromCodePlacementOpt = true;
// Predictable cmov don't hurt on atom because it's in-order.
- predictableSelectIsExpensive = !Subtarget->isAtom();
+ PredictableSelectIsExpensive = !Subtarget->isAtom();
setPrefFunctionAlignment(4); // 2^4 bytes.
}
-
EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
if (!VT.isVector()) return MVT::i8;
return VT.changeVectorElementTypeToInteger();
}
-
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
@@ -1348,34 +1405,30 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
- // linux. This is because the stack realignment code can't handle certain
- // cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
- if (IsZeroVal &&
- !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+ if ((!IsMemset || ZeroMemset) &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16))) &&
- Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->getStackAlignment() >= 32) {
- if (Subtarget->hasAVX2())
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Size >= 32) {
+ if (Subtarget->hasInt256())
return MVT::v8i32;
- if (Subtarget->hasAVX())
+ if (Subtarget->hasFp256())
return MVT::v8f32;
}
if (Subtarget->hasSSE2())
@@ -1384,7 +1437,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
- Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
@@ -1396,6 +1448,21 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::i32;
}
+bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
+ if (VT == MVT::f32)
+ return X86ScalarSSEf32;
+ else if (VT == MVT::f64)
+ return X86ScalarSSEf64;
+ return true;
+}
+
+bool
+X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ if (Fast)
+ *Fast = Subtarget->isUnalignedMemAccessFast();
+ return true;
+}
+
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
@@ -1449,10 +1516,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
-X86TargetLowering::findRepresentativeClass(EVT VT) const{
+X86TargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
@@ -1494,7 +1561,6 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
-
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1526,14 +1592,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- // Add the regs to the liveout set for the function.
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
- MRI.addLiveOut(RVLocs[i].getLocReg());
-
SDValue Flag;
-
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
@@ -1602,14 +1661,16 @@ X86TargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax.
- if (Subtarget->is64Bit() &&
- DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
@@ -1617,11 +1678,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ unsigned RetValReg
+ = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+ X86::RAX : X86::EAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
- // RAX now acts like a return value.
- MRI.addLiveOut(X86::RAX);
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
}
RetOps[0] = Chain; // Update chain.
@@ -1666,8 +1730,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-EVT
-X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+MVT
+X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
@@ -1676,7 +1740,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
else
ReturnMVT = MVT::i32;
- EVT MinVT = getRegisterType(Context, ReturnMVT);
+ MVT MinVT = getRegisterType(ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -1698,7 +1762,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
@@ -1742,7 +1806,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
-
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
@@ -1806,7 +1869,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -1893,7 +1957,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -1955,10 +2019,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
- if (RegVT.isVector()) {
- ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
- ArgValue);
- } else
+ if (RegVT.isVector())
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
+ else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
} else {
@@ -1974,14 +2037,18 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. Save the argument into
- // a virtual register so that we can access it from the return points.
- if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
+ if (MF.getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2034,8 +2101,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2238,7 +2305,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2513,8 +2580,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->getFnAttributes().
- hasAttribute(Attributes::NonLazyBind)) {
+ cast<Function>(GV)->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -2594,8 +2662,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
- return DAG.getNode(X86ISD::TC_RETURN, dl,
- NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -2632,7 +2699,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ins, dl, DAG, InVals);
}
-
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
@@ -2754,7 +2820,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
@@ -2793,7 +2859,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
- if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -2913,9 +2979,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
- !isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
@@ -2924,7 +2996,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == 3)
+ if (++NumInRegs == MaxInRegs)
return false;
break;
}
@@ -2941,7 +3013,6 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return X86::createFastISel(funcInfo, libInfo);
}
-
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
@@ -2961,7 +3032,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
@@ -3011,7 +3082,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3052,7 +3123,6 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
-
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
@@ -3103,6 +3173,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
+ case CallingConv::HiPE:
+ return TailCallOpt;
}
}
@@ -3233,9 +3305,7 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
- if (Val < 0 || Val == CmpVal)
- return true;
- return false;
+ return (Val < 0 || Val == CmpVal);
}
/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
@@ -3262,8 +3332,8 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
- if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Lower quadword copied in order or undef.
@@ -3291,8 +3361,8 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
- if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Upper quadword copied in order.
@@ -3322,8 +3392,8 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
const X86Subtarget *Subtarget) {
- if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
- (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3410,9 +3480,9 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
bool Commuted = false) {
- if (!HasAVX && VT.getSizeInBits() == 256)
+ if (!HasFp256 && VT.is256BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3547,7 +3617,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
DebugLoc dl = SVOp->getDebugLoc();
if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3591,14 +3661,14 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2, bool V2IsSplat = false) {
+ bool HasInt256, bool V2IsSplat = false) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3630,14 +3700,14 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2, bool V2IsSplat = false) {
+ bool HasInt256, bool V2IsSplat = false) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3667,22 +3737,22 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ bool Is256BitVec = VT.is256BitVector();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
// FIXME: Need a better way to get rid of this, there's no latency difference
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
// the former later. We should also remove the "_undef" special mask.
- if (NumElts == 4 && VT.getSizeInBits() == 256)
+ if (NumElts == 4 && Is256BitVec)
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3710,14 +3780,14 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3766,8 +3836,8 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || !VT.is256BitVector())
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3798,7 +3868,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned HalfSize = VT.getVectorNumElements()/2;
@@ -3826,13 +3896,13 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX)
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256)
return false;
unsigned NumElts = VT.getVectorNumElements();
// Only match 256-bit with 32/64-bit types
- if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
+ if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
return false;
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3888,8 +3958,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i+1" is the value the indexed mask element must have
@@ -3911,8 +3981,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i" is the value the indexed mask element must have
@@ -3927,8 +3997,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || !VT.is256BitVector())
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3972,9 +4042,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -3991,9 +4060,8 @@ bool X86::isVINSERTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -4003,7 +4071,7 @@ bool X86::isVINSERTF128Index(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for PSHUF/SHUFP");
@@ -4033,7 +4101,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4057,7 +4125,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4081,7 +4149,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
@@ -4112,8 +4180,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- EVT VecVT = N->getOperand(0).getValueType();
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4129,8 +4197,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- EVT VecVT = N->getValueType(0);
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4140,7 +4208,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
/// Handles 256-bit.
static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
@@ -4160,17 +4228,18 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->isNullValue()) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+ return CN->isNullValue();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+ return CFP->getValueAPF().isPosZero();
+ return false;
}
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
@@ -4319,12 +4388,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (Size == 128) { // SSE
+ if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -4332,8 +4400,8 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
- } else if (Size == 256) { // AVX
- if (Subtarget->hasAVX2()) { // AVX2
+ } else if (VT.is256BitVector()) { // AVX
+ if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
@@ -4354,22 +4422,21 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (Size == 256) {
- if (HasAVX2) { // AVX2
+ if (VT.is256BitVector()) {
+ if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
} else { // AVX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
- } else if (Size == 128) {
+ } else if (VT.is128BitVector()) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else
llvm_unreachable("Unexpected vector type");
@@ -4448,14 +4515,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
EVT VT = V.getValueType();
DebugLoc dl = V.getDebugLoc();
- unsigned Size = VT.getSizeInBits();
- if (Size == 128) {
+ if (VT.is128BitVector()) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
- } else if (Size == 256) {
+ } else if (VT.is256BitVector()) {
// To use VPERMILPS to splat scalars, the second half of indicies must
// refer to the higher part, which is a duplication of the lower one,
// because VPERMILPS can only handle in-lane permutations.
@@ -4479,14 +4545,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
int EltNo = SV->getSplatIndex();
int NumElems = SrcVT.getVectorNumElements();
- unsigned Size = SrcVT.getSizeInBits();
+ bool Is256BitVec = SrcVT.is256BitVector();
- assert(((Size == 128 && NumElems > 4) || Size == 256) &&
- "Unknown how to promote splat for type");
+ assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
+ "Unknown how to promote splat for type");
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = Extract128BitVector(V1, EltNo, DAG, dl);
if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
@@ -4503,7 +4569,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Recreate the 256-bit vector and place the same 128-bit vector
// into the low and high part. This is necessary because we want
// to use VPERM* to shuffle the vectors
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
@@ -4555,6 +4621,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
break;
+ case X86ISD::PALIGNR:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
@@ -4598,7 +4668,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLPS:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
@@ -4893,7 +4962,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
DAG.getConstant(NumBits,
- TLI.getShiftAmountTy(SrcOp.getValueType()))));
+ TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
SDValue
@@ -5063,10 +5132,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// or SDValue() otherwise.
SDValue
X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
@@ -5109,7 +5178,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
Sc.getOpcode() != ISD::BUILD_VECTOR) {
- if (!Subtarget->hasAVX2())
+ if (!Subtarget->hasInt256())
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
@@ -5136,7 +5205,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
- if (ConstSplatVal && Subtarget->hasAVX2()) {
+ if (ConstSplatVal && Subtarget->hasInt256()) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
unsigned ScalarSize = CVT.getSizeInBits();
@@ -5164,7 +5233,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// Handle AVX2 in-register broadcasts.
- if (!IsLoad && Subtarget->hasAVX2() &&
+ if (!IsLoad && Subtarget->hasInt256() &&
(ScalarSize == 32 || (Is256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
@@ -5177,7 +5246,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
- if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+ if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
@@ -5264,8 +5333,8 @@ SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// Vectors containing all zeros can be matched by pxor and xorps later
@@ -5281,11 +5350,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
- if (ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
+ if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
@@ -5596,7 +5665,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
+ MVT ResVT = Op.getValueType().getSimpleVT();
assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
@@ -5623,63 +5692,51 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- if (!Subtarget->hasSSE41())
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
- unsigned ISDNo = 0;
- MVT OpTy;
-
- switch (VT.SimpleTy) {
- default: return SDValue();
- case MVT::v8i16:
- ISDNo = X86ISD::BLENDPW;
- OpTy = MVT::v8i16;
- break;
- case MVT::v4i32:
- case MVT::v4f32:
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v4f32;
- break;
- case MVT::v2i64:
- case MVT::v2f64:
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v2f64;
- break;
- case MVT::v8i32:
- case MVT::v8f32:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v8f32;
- break;
- case MVT::v4i64:
- case MVT::v4f64:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v4f64;
- break;
- }
- assert(ISDNo && "Invalid Op Number");
+ // Check the mask for BLEND and build the value.
+ unsigned MaskValue = 0;
+ // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumElemsInLane = NumElems / NumLanes;
- unsigned MaskVals = 0;
+ // Blend for v16i16 should be symetric for the both lanes.
+ for (unsigned i = 0; i < NumElemsInLane; ++i) {
- for (unsigned i = 0; i != NumElems; ++i) {
+ int SndLaneEltIdx = (NumLanes == 2) ?
+ SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if (EltIdx == (int)i || EltIdx < 0)
- MaskVals |= (1<<i);
- else if (EltIdx == (int)(i + NumElems))
- continue; // Bit is set to zero;
+
+ if ((EltIdx < 0 || EltIdx == (int)i) &&
+ (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ continue;
+
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx < 0 ||
+ (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+ MaskValue |= (1<<i);
else
return SDValue();
}
- V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
- SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
- DAG.getConstant(MaskVals, MVT::i32));
+ // Convert i32 vectors to floating point if it is not AVX2.
+ // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+ MVT BlendVT = VT;
+ if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+ }
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
@@ -5814,6 +5871,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
}
}
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
@@ -5829,7 +5891,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
int EltIdx = MaskVals[i] * 2;
int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
- pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
@@ -5947,6 +6009,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@@ -6065,7 +6132,7 @@ static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -6079,7 +6146,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
// (1) one of input vector is undefined or zeroinitializer.
// The mask value 0x80 puts 0 in the corresponding slot of the vector.
// And (2) the mask indexes don't cross the 128-bit lane.
- if (VT != MVT::v32i8 || !Subtarget->hasAVX2() ||
+ if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
(!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
return SDValue();
@@ -6112,8 +6179,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG, DebugLoc dl) {
+ SelectionDAG &DAG) {
MVT VT = SVOp->getValueType(0).getSimpleVT();
+ DebugLoc dl = SVOp->getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
unsigned Scale;
@@ -6149,7 +6217,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(EVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, EVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, DebugLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6191,14 +6259,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
DebugLoc dl = SVOp->getDebugLoc();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Output[2];
SmallVector<int, 16> Mask;
@@ -6303,7 +6371,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
assert(VT.is128BitVector() && "Unsupported vector size");
@@ -6452,23 +6520,6 @@ static bool MayFoldVectorLoad(SDValue V) {
return MayFoldLoad(V);
}
-// FIXME: the version above should always be used. Since there's
-// a bug where several vector shuffles can't be folded because the
-// DAG is not updated during lowering and a node claims to have two
-// uses while it only has one, use this version, and let isel match
-// another instruction if the load really happens to have more than
-// one use. Remove this version after this bug get fixed.
-// rdar://8434668, PR8156
-static bool RelaxedMayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- V = V.getOperand(0);
- if (ISD::isNormalLoad(V.getNode()))
- return true;
- return false;
-}
-
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
@@ -6574,7 +6625,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// Reduce a vector shuffle to zext.
SDValue
-X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
@@ -6582,7 +6633,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Only AVX2 support 256-bit vector integer extending.
- if (!Subtarget->hasAVX2() && VT.is256BitVector())
+ if (!Subtarget->hasInt256() && VT.is256BitVector())
return SDValue();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6618,9 +6669,10 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+ LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
if (!isTypeLegal(NVT))
return SDValue();
@@ -6639,8 +6691,21 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
}
return DAG.getNode(ISD::BITCAST, DL, VT,
@@ -6650,7 +6715,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -6660,25 +6725,14 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splat operations
if (SVOp->isSplat()) {
- unsigned NumElem = VT.getVectorNumElements();
- int Size = VT.getSizeInBits();
-
// Use vbroadcast whenever the splat comes from a foldable load
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
-
- // Handle splats by matching through known shuffle masks
- if ((Size == 128 && NumElem <= 4) ||
- (Size == 256 && NumElem < 8))
- return SDValue();
-
- // All remaning splats are promoted to target supported vector shuffles.
- return PromoteSplat(SVOp, DAG);
}
// Check integer expanding shuffles.
- SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+ SDValue NewOp = LowerVectorIntExtend(Op, DAG);
if (NewOp.getNode())
return NewOp;
@@ -6686,7 +6740,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
VT == MVT::v16i16 || VT == MVT::v32i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 ||
@@ -6694,18 +6748,18 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
@@ -6720,7 +6774,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -6728,11 +6782,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasSSE2 = Subtarget->hasSSE2();
- bool HasAVX = Subtarget->hasAVX();
- bool HasAVX2 = Subtarget->hasAVX2();
+ bool HasFp256 = Subtarget->hasFp256();
+ bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -6766,20 +6820,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
- if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
- if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
- V2IsUndef && RelaxedMayFoldVectorLoad(V1))
+ V2IsUndef && MayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (isMOVHLPS_v_undef_Mask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
+ if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
@@ -6792,12 +6846,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
- if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
-
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+ if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+ DAG);
+
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG);
}
@@ -6810,7 +6865,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6828,7 +6883,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// FIXME: fold these into legal mask.
- if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+ if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
if (isMOVHLPSMask(M, VT))
@@ -6849,7 +6904,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift) {
// No better options. Use a vshldq / vsrldq.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6878,10 +6933,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (isUNPCKLMask(M, VT, HasAVX2))
+ if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(M, VT, HasAVX2))
+ if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
@@ -6890,9 +6945,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// new vector_shuffle with the corrected mask.p
SmallVector<int, 8> NewMask(M.begin(), M.end());
NormalizeMask(NewMask, NumElems);
- if (isUNPCKLMask(NewMask, VT, HasAVX2, true))
+ if (isUNPCKLMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(NewMask, VT, HasAVX2, true))
+ if (isUNPCKHMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
@@ -6904,15 +6959,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
std::swap(V1IsSplat, V2IsSplat);
Commuted = false;
- if (isUNPCKLMask(M, VT, HasAVX2))
+ if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(M, VT, HasAVX2))
+ if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -6920,7 +6975,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// nodes, and remove one by one until they don't return Op anymore.
if (isPALIGNRMask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
@@ -6930,23 +6985,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
- if (isPSHUFHWMask(M, VT, HasAVX2))
+ if (isPSHUFHWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
- if (isPSHUFLWMask(M, VT, HasAVX2))
+ if (isPSHUFLWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasAVX))
+ if (isSHUFPMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
- if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
- if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
@@ -6955,12 +7010,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
//
// Handle VMOVDDUPY permutations
- if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasAVX)) {
- if (HasAVX2 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT, HasFp256)) {
+ if (HasInt256 && VT == MVT::v8i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -6968,7 +7023,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// Handle VPERM2F128/VPERM2I128 permutations
- if (isVPERM2X128Mask(M, VT, HasAVX))
+ if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
@@ -6976,7 +7031,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (BlendOp.getNode())
return BlendOp;
- if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+ if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
SmallVector<SDValue, 8> permclMask;
for (unsigned i = 0; i != 8; ++i) {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
@@ -6988,11 +7043,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
}
- if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+ if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
getShuffleCLImmediate(SVOp), DAG);
-
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -7030,13 +7084,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
- if (!Op.getOperand(0).getValueType().is128BitVector())
+ if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -7094,7 +7146,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
return SDValue();
}
-
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
@@ -7102,7 +7153,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
+ MVT VecVT = Vec.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
@@ -7129,7 +7180,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Res;
}
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
@@ -7142,7 +7193,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- EVT EltVT = MVT::i32;
+ MVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
@@ -7157,7 +7208,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7176,7 +7227,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7186,11 +7237,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7243,8 +7292,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7292,7 +7341,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT OpVT = Op.getValueType();
+ MVT OpVT = Op.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
@@ -7323,7 +7372,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
// upper bits of a vector.
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasAVX()) {
+ if (Subtarget->hasFp256()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
@@ -7343,7 +7392,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasAVX()) {
+ if (Subtarget->hasFp256()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue SubVec = Op.getNode()->getOperand(1);
@@ -7459,7 +7508,6 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
@@ -7508,8 +7556,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
- int64_t Offset,
- SelectionDAG &DAG) const {
+ int64_t Offset, SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
@@ -7729,7 +7776,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -7779,7 +7826,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Chain.getValue(1));
}
- if (Subtarget->isTargetWindows()) {
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7799,18 +7846,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
- // %gs:0x58 (64-bit).
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58)
- : DAG.getExternalSymbol("_tls_array",
- getPointerTy()),
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
@@ -7846,7 +7894,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("TLS not implemented for this target.");
}
-
/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
@@ -8013,9 +8060,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SmallVector<Constant*,2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
@@ -8109,7 +8158,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
@@ -8202,8 +8252,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
-std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
@@ -8295,46 +8346,197 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
}
}
-SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ DebugLoc dl = Op->getDebugLoc();
+
+ // Optimize vectors in AVX mode:
+ //
+ // v8i16 -> v8i32
+ // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
+ // Concat upper and lower parts.
+ //
+ // v4i32 -> v4i64
+ // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
+ // Concat upper and lower parts.
+ //
+
+ if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
+
+ SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ SDValue Undef = DAG.getUNDEF(InVT);
+ bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+ SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+ SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+
+ MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
+SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
+
+ return SDValue();
+}
+SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
+
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
if (!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements())
return SDValue();
- assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+ assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
// AVX2 has better support of integer extending.
- if (Subtarget->hasAVX2())
+ if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+ DAG.getVectorShuffle(MVT::v8i16, DL, In,
+ DAG.getUNDEF(MVT::v8i16),
+ &Mask[0]));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
}
-SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT SVT = Op.getOperand(0).getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getValueType().getSimpleVT();
- if (!VT.is128BitVector() || !SVT.is256BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
+ if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ // On AVX2, v4i64 -> v4i32 becomes VPERMD.
+ if (Subtarget->hasInt256()) {
+ static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+ In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
+ ShufMask);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(2));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The PSHUFD mask:
+ static const int ShufMask1[] = {0, 2, 0, 0};
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
+
+ // The MOVLHPS mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
+ }
+
+ if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+ if (Subtarget->hasInt256()) {
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+
+ SmallVector<SDValue,32> pshufbMask;
+ for (unsigned i = 0; i < 2; ++i) {
+ pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+ for (unsigned j = 0; j < 8; ++j)
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
+ &pshufbMask[0], 32);
+ In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+
+ static const int ShufMask[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
+ &ShufMask[0]);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, In);
+ }
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(4));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+
+ // The PSHUFB mask:
+ static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The MOVLHPS Mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+ }
+
+ // Handle truncation of V256 to V128 using shuffles.
+ if (!VT.is128BitVector() || !SVT.is256BitVector())
return SDValue();
- assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+ assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
+ "Invalid op");
+ assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumElems * 2);
- SDValue In = Op.getOperand(0);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
@@ -8348,9 +8550,10 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v8i16)
- return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+ MVT VT = Op.getValueType().getSimpleVT();
+ if (VT.isVector()) {
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT,
DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
MVT::v8i32, Op.getOperand(0)));
return SDValue();
@@ -8389,12 +8592,11 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return FIST;
}
-SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -8406,8 +8608,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8415,9 +8617,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, ~(1ULL << 63))));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8438,8 +8642,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8447,9 +8651,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 1ULL << 63)));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8473,8 +8679,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT SrcVT = Op1.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT SrcVT = Op1.getValueType().getSimpleVT();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -8493,13 +8699,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// First get the sign bit of second operand.
SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8522,13 +8730,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8544,7 +8756,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -8554,7 +8766,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
@@ -8899,6 +9112,11 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
@@ -8947,6 +9165,14 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
if (LHS.getNode()) {
+ // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
+ // the condition code later.
+ bool Invert = false;
+ if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
+ Invert = true;
+ LHS = LHS.getOperand(0);
+ }
+
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
@@ -8962,7 +9188,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
- unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ // Flip the condition if the LHS was a not instruction
+ if (Invert)
+ Cond = X86::GetOppositeBranchCondition(Cond);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, MVT::i8), BT);
}
@@ -8970,65 +9199,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
return SDValue();
}
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
-
- assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- // Optimize to BT if possible.
- // Lower (X & (1 << N)) == 0 to BT(X, N).
- // Lower ((X >>u N) & 1) != 0 to BT(X, N).
- // Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->isNullValue() &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode())
- return NewSetCC;
- }
-
- // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
- // these.
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
- // If the input is a setcc, then reuse the input setcc or use a new one with
- // the inverted condition.
- if (Op0.getOpcode() == X86ISD::SETCC) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (!Invert) return Op0;
-
- CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
- }
- }
-
- bool isFP = Op1.getValueType().isFloatingPoint();
- unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
- if (X86CC == X86::COND_INVALID)
- return SDValue();
-
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAGS);
-}
-
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
@@ -9048,27 +9222,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
-
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
#ifndef NDEBUG
- EVT EltVT = Op0.getValueType().getVectorElementType();
+ MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
@@ -9133,7 +9307,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.is256BitVector() && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
@@ -9163,8 +9337,28 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
return SDValue();
- if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
- return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+ // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+ // pcmpeqd + pshufd + pand.
+ assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+ // First cast everything to the right type,
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Do the compare.
+ SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+ // Make sure the lower and upper halves are both all-ones.
+ const int Mask[] = { 1, 0, 3, 2 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
+ Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign
@@ -9189,6 +9383,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
@@ -9220,11 +9471,6 @@ static bool isZero(SDValue V) {
return C && C->isNullValue();
}
-static bool isAllOnes(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isAllOnesValue();
-}
-
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
@@ -9316,7 +9562,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -9425,6 +9671,53 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
+SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ DebugLoc dl = Op->getDebugLoc();
+
+ if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
+ (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
+ unsigned NumElems = InVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(InVT);
+
+ SmallVector<int,8> ShufMask1(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask1[i] = i;
+
+ SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
+
+ SmallVector<int,8> ShufMask2(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask2[i] = i + NumElems/2;
+
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
+
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
// from the AND / OR.
@@ -9713,7 +10006,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
Chain, Dest, CC, Cond);
}
-
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
@@ -9876,8 +10168,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->getFnAttributes()
- .hasAttribute(Attributes::NoImplicitFloat)) &&
+ .getFunction()->getAttributes()
+ .hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@@ -9925,7 +10218,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
-// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue SrcOp, SDValue ShAmt,
@@ -10082,6 +10375,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ // SSE2/AVX2 sub with unsigned saturation intrinsics
+ case Intrinsic::x86_sse2_psubus_b:
+ case Intrinsic::x86_sse2_psubus_w:
+ case Intrinsic::x86_avx2_psubus_b:
+ case Intrinsic::x86_avx2_psubus_w:
+ return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
// SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
@@ -10131,6 +10432,100 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+ // SSE2/SSE41/AVX2 integer max/min intrinsics.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ Opcode = X86ISD::UMAX;
+ break;
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ Opcode = X86ISD::UMIN;
+ break;
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ Opcode = X86ISD::SMAX;
+ break;
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d:
+ Opcode = X86ISD::SMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/SSE2/AVX floating point max/min intrinsics.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ Opcode = X86ISD::FMAX;
+ break;
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256:
+ Opcode = X86ISD::FMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
// AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
@@ -10198,6 +10593,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
+ case Intrinsic::x86_sse_sqrt_ps:
+ case Intrinsic::x86_sse2_sqrt_pd:
+ case Intrinsic::x86_avx_sqrt_ps_256:
+ case Intrinsic::x86_avx_sqrt_pd_256:
+ return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -10513,16 +10914,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- // RDRAND intrinsics.
+ // RDRAND/RDSEED intrinsics.
case Intrinsic::x86_rdrand_16:
case Intrinsic::x86_rdrand_32:
- case Intrinsic::x86_rdrand_64: {
+ case Intrinsic::x86_rdrand_64:
+ case Intrinsic::x86_rdseed_16:
+ case Intrinsic::x86_rdseed_32:
+ case Intrinsic::x86_rdseed_64: {
+ unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
+ IntNo == Intrinsic::x86_rdseed_32 ||
+ IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
+ X86ISD::RDRAND;
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
- SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0));
+ SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
- // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise
- // return the value from Rand, which is always 0, casted to i32.
+ // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
+ // Otherwise return the value from Rand, which is always 0, casted to i32.
SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
DAG.getConstant(1, Op->getValueType(1)),
DAG.getConstant(X86::COND_B, MVT::i32),
@@ -10535,6 +10943,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
+
+ // XTEST intrinsics.
+ case Intrinsic::x86_xtest: {
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NE, MVT::i8),
+ InTrans);
+ SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+ Ret, SDValue(InTrans.getNode(), 1));
+ }
}
}
@@ -10710,7 +11130,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Check that ECX wasn't needed by an 'inreg' parameter.
FunctionType *FTy = Func->getFunctionType();
- const AttrListPtr &Attrs = Func->getAttributes();
+ const AttributeSet &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
@@ -10718,7 +11138,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
+ if (Attrs.hasAttribute(Idx, Attribute::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
@@ -10808,7 +11228,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, 2, 2);
@@ -10841,7 +11260,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
-
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
@@ -10970,17 +11388,43 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
+ if (VT == MVT::v4i32) {
+ assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+ "Should not custom lower when pmuldq is available!");
+
+ // Extract the odd parts.
+ const int UnpackMask[] = { 1, -1, 3, -1 };
+ SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
+ SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
+
+ // Multiply the even parts.
+ SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
+ // Now multiply odd parts.
+ SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
+
+ Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
+ Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+
+ // Merge the two vectors back together with a shuffle. This expands into 2
+ // shuffles.
+ const int ShufMask[] = { 0, 4, 2, 6 };
+ return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
+ }
+
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
"Only know how to lower V2I64/V4I64 multiply");
- DebugLoc dl = Op.getDebugLoc();
-
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
//
@@ -10992,9 +11436,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
-
SDValue ShAmt = DAG.getConstant(32, MVT::i32);
SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
@@ -11018,16 +11459,55 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltTy = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Lower sdiv X, pow2-const.
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+ if (!C)
+ return SDValue();
+
+ APInt SplatValue, SplatUndef;
+ unsigned MinSplatBits;
+ bool HasAnyUndefs;
+ if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+ return SDValue();
+
+ if ((SplatValue != 0) &&
+ (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+ unsigned lg2 = SplatValue.countTrailingZeros();
+ // Splat the sign bit.
+ SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+ SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+ SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+ SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+ SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (SplatValue.isNonNegative())
+ return SRA;
+
+ SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+ return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ }
+ return SDValue();
+}
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- LLVMContext *Context = DAG.getContext();
-
- if (!Subtarget->hasSSE2())
- return SDValue();
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
@@ -11036,7 +11516,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
uint64_t ShiftAmt = C->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
- (Subtarget->hasAVX2() &&
+ (Subtarget->hasInt256() &&
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
if (Op.getOpcode() == ISD::SHL)
return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
@@ -11093,7 +11573,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unknown shift opcode.");
}
- if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+ if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
@@ -11139,19 +11619,229 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+ uint64_t ShiftAmt = 0;
+ for (unsigned i = 0; i != Ratio; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+ }
+ // Check remaining shift amounts.
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ SDValue BaseShAmt;
+ EVT EltVT = VT.getVectorElementType();
+
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i, j;
+ for (i = 0; i != NumElts; ++i) {
+ if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ break;
+ }
+ for (j = i; j != NumElts; ++j) {
+ SDValue Arg = Amt.getOperand(j);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != Amt.getOperand(i))
+ break;
+ }
+ if (i != NumElts && j == NumElts)
+ BaseShAmt = Amt.getOperand(i);
+ } else {
+ if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Amt = Amt.getOperand(0);
+ if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+ SDValue InVec = Amt.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx =
+ cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ if (BaseShAmt.getNode()) {
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRA:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ std::vector<SDValue> Vals(Ratio);
+ for (unsigned i = 0; i != Ratio; ++i)
+ Vals[i] = Amt.getOperand(i);
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned j = 0; j != Ratio; ++j)
+ if (Vals[j] != Amt.getOperand(i + j))
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ SDValue V;
+
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ V = LowerScalarVariableShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+ if (Subtarget->hasInt256()) {
+ if (Op.getOpcode() == ISD::SRL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return Op;
+ }
+
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
- DAG.getConstant(23, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
- const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
- Constant *C = ConstantDataVector::get(*Context, CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
-
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -11160,8 +11850,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
- DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
@@ -11336,9 +12025,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
default: return SDValue();
case MVT::v8i32:
case MVT::v16i16:
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
- if (!Subtarget->hasAVX2()) {
+ if (!Subtarget->hasInt256()) {
// needs to be split
unsigned NumElems = VT.getVectorNumElements();
@@ -11364,14 +12053,28 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
- Op.getOperand(0), ShAmt, DAG);
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
}
}
-
static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
@@ -11456,7 +12159,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
-
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT T = Op.getValueType();
@@ -11595,6 +12297,43 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11624,11 +12363,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
- case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
- case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
- case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -11674,6 +12415,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
}
}
@@ -11727,6 +12470,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
DebugLoc dl = N->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
@@ -11760,7 +12504,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UINT_TO_FP: {
- if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -11776,6 +12521,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_ROUND: {
+ if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+ return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
@@ -11942,13 +12689,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
- case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
- case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
- case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
+ case X86ISD::BLENDI: return "X86ISD::BLENDI";
+ case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
+ case X86ISD::UMAX: return "X86ISD::UMAX";
+ case X86ISD::UMIN: return "X86ISD::UMIN";
+ case X86ISD::SMAX: return "X86ISD::SMAX";
+ case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
@@ -12001,14 +12751,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::ANDN: return "X86ISD::ANDN";
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
@@ -12039,6 +12788,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
@@ -12047,6 +12797,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
+ case X86ISD::XTEST: return "X86ISD::XTEST";
}
}
@@ -12104,24 +12855,21 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
-
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
- return Imm == (int32_t)Imm;
+ return isInt<32>(Imm);
}
bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Can also use sub to handle negated immediates.
- return Imm == (int32_t)Imm;
+ return isInt<32>(Imm);
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -12129,9 +12877,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -12144,6 +12890,30 @@ bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
+bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ EVT VT1 = Val.getValueType();
+ if (isZExtFree(VT1, VT2))
+ return true;
+
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // X86 has 8, 16, and 32-bit zero-extending loads.
+ return true;
+ }
+
+ return false;
+}
+
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
@@ -12164,15 +12934,15 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return (VT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
+ isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
- isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
+ isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
+ isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, VT, Subtarget) ||
- isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
+ isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
}
bool
@@ -12185,8 +12955,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
- isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
}
return false;
}
@@ -12379,13 +13149,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) {
// to
//
// ...
-// EAX = LOAD MI.addr
+// t1 = LOAD MI.addr
// loop:
-// t1 = OP MI.val, EAX
-// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+// t4 = phi(t1, t3 / loop)
+// t2 = OP MI.val, t4
+// EAX = t4
+// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
+// t3 = EAX
// JNE loop
// sink:
-// dst = EAX
+// dst = t3
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
@@ -12400,7 +13173,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -12422,7 +13195,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
MVT::SimpleValueType VT = *RC->vt_begin();
- unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned t3 = MRI.createVirtualRegister(RC);
+ unsigned t4 = MRI.createVirtualRegister(RC);
+ unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
unsigned LOADOpc = getLoadOpcode(VT);
@@ -12430,12 +13207,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// For the atomic load-arith operator, we generate
//
// thisMBB:
- // EAX = LOAD [MI.addr]
+ // t1 = LOAD [MI.addr]
// mainMBB:
+ // t4 = phi(t1 / thisMBB, t3 / mainMBB)
// t1 = OP MI.val, EAX
+ // EAX = t4
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // t3 = EAX
// JNE mainMBB
// sinkMBB:
+ // dst = t3
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -12451,23 +13232,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ }
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
- mainMBB->addLiveIn(AccPhyReg);
- // Copy AccPhyReg as it is used more than once.
- unsigned AccReg = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
- .addReg(AccPhyReg);
+ // Add a PHI.
+ MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
- unsigned t1 = MRI.createVirtualRegister(RC);
unsigned Opc = MI->getOpcode();
switch (Opc) {
default:
@@ -12485,20 +13277,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
case X86::ATOMXOR32:
case X86::ATOMXOR64: {
unsigned ARITHOpc = getNonAtomicOpcode(Opc);
- BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
- .addReg(AccReg);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
+ .addReg(t4);
break;
}
case X86::ATOMNAND8:
case X86::ATOMNAND16:
case X86::ATOMNAND32:
case X86::ATOMNAND64: {
- unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned Tmp = MRI.createVirtualRegister(RC);
unsigned NOTOpc;
unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
- BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
- .addReg(AccReg);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
+ .addReg(t4);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
break;
}
case X86::ATOMMAX8:
@@ -12522,20 +13314,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
BuildMI(mainMBB, DL, TII->get(CMPOpc))
.addReg(SrcReg)
- .addReg(AccReg);
+ .addReg(t4);
if (Subtarget->hasCMov()) {
if (VT != MVT::i8) {
// Native support
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
.addReg(SrcReg)
- .addReg(AccReg);
+ .addReg(t4);
} else {
// Promote i8 to i32 to use CMOV32
- const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC32 =
+ TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
unsigned AccReg32 = MRI.createVirtualRegister(RC32);
- unsigned t2 = MRI.createVirtualRegister(RC32);
+ unsigned Tmp = MRI.createVirtualRegister(RC32);
unsigned Undef = MRI.createVirtualRegister(RC32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
@@ -12546,15 +13340,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
.addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
.addReg(Undef)
- .addReg(AccReg)
+ .addReg(t4)
.addImm(X86::sub_8bit);
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
.addReg(SrcReg32)
.addReg(AccReg32);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
- .addReg(t2, 0, X86::sub_8bit);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
+ .addReg(Tmp, 0, X86::sub_8bit);
}
} else {
// Use pseudo select and lower them.
@@ -12563,36 +13357,47 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
unsigned SelOpc = getPseudoCMOVOpc(VT);
X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
- MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
- .addReg(SrcReg).addReg(AccReg)
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
+ .addReg(SrcReg).addReg(t4)
.addImm(CC);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
+ Phi->eraseFromParent();
}
break;
}
}
- // Copy AccPhyReg back from virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
- .addReg(AccReg);
+ // Copy PhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
+ .addReg(t4);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.addReg(t1);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ MIB.addReg(t2);
MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Copy PhyReg back to virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
+ .addReg(PhyReg);
+
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- sinkMBB->addLiveIn(AccPhyReg);
-
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstReg)
- .addReg(AccPhyReg);
+ .addReg(t3);
MI->eraseFromParent();
return sinkMBB;
@@ -12609,15 +13414,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// to
//
// ...
-// EAX = LOAD [MI.addr + 0]
-// EDX = LOAD [MI.addr + 4]
+// t1L = LOAD [MI.addr + 0]
+// t1H = LOAD [MI.addr + 4]
// loop:
-// EBX = OP MI.val.lo, EAX
-// ECX = OP MI.val.hi, EDX
+// t4L = phi(t1L, t3L / loop)
+// t4H = phi(t1H, t3H / loop)
+// t2L = OP MI.val.lo, t4L
+// t2H = OP MI.val.hi, t4H
+// EAX = t4L
+// EDX = t4H
+// EBX = t2L
+// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// t3L = EAX
+// t3H = EDX
// JNE loop
// sink:
-// dst = EDX:EAX
+// dstL = t3L
+// dstH = t3H
// ...
MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
@@ -12632,7 +13446,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -12658,20 +13472,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
const TargetRegisterClass *RC = &X86::GR32RegClass;
const TargetRegisterClass *RC8 = &X86::GR8RegClass;
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ unsigned t3L = MRI.createVirtualRegister(RC);
+ unsigned t3H = MRI.createVirtualRegister(RC);
+ unsigned t4L = MRI.createVirtualRegister(RC);
+ unsigned t4H = MRI.createVirtualRegister(RC);
+
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm;
// For the atomic load-arith operator, we generate
//
// thisMBB:
- // EAX = LOAD [MI.addr + 0]
- // EDX = LOAD [MI.addr + 4]
+ // t1L = LOAD [MI.addr + 0]
+ // t1H = LOAD [MI.addr + 4]
// mainMBB:
- // EBX = OP MI.vallo, EAX
- // ECX = OP MI.valhi, EDX
+ // t4L = phi(t1L / thisMBB, t3L / mainMBB)
+ // t4H = phi(t1H / thisMBB, t3H / mainMBB)
+ // t2L = OP MI.val.lo, t4L
+ // t2H = OP MI.val.hi, t4H
+ // EBX = t2L
+ // ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
- // JNE mainMBB
+ // t3L = EAX
+ // t3H = EDX
+ // JNE loop
// sinkMBB:
+ // dstL = t3L
+ // dstH = t3H
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -12688,35 +13519,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
// thisMBB:
// Lo
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+ unsigned flags = (*MMOI)->getFlags();
+ flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+ (*MMOI)->getSize(),
+ (*MMOI)->getBaseAlignment(),
+ (*MMOI)->getTBAAInfo(),
+ (*MMOI)->getRanges());
+ MIB.addMemOperand(MMO);
+ };
+ MachineInstr *LowMI = MIB;
+
// Hi
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp)
+ if (i == X86::AddrDisp) {
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- else
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ } else {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
thisMBB->addSuccessor(mainMBB);
// mainMBB:
MachineBasicBlock *origMainMBB = mainMBB;
- mainMBB->addLiveIn(X86::EAX);
- mainMBB->addLiveIn(X86::EDX);
-
- // Copy EDX:EAX as they are used more than once.
- unsigned LoReg = MRI.createVirtualRegister(RC);
- unsigned HiReg = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
- unsigned t1L = MRI.createVirtualRegister(RC);
- unsigned t1H = MRI.createVirtualRegister(RC);
+ // Add PHIs.
+ MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -12729,19 +13575,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
case X86::ATOMSUB6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
+ .addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
+ .addReg(SrcHiReg);
break;
}
case X86::ATOMNAND6432: {
unsigned HiOpc, NOTOpc;
unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
- unsigned t2L = MRI.createVirtualRegister(RC);
- unsigned t2H = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+ unsigned TmpL = MRI.createVirtualRegister(RC);
+ unsigned TmpH = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
+ .addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
+ .addReg(t4H);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
break;
}
case X86::ATOMMAX6432:
@@ -12757,12 +13607,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
unsigned cc = MRI.createVirtualRegister(RC);
// cl := cmp src_lo, lo
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcLoReg).addReg(LoReg);
+ .addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
// ch := cmp src_hi, hi
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcHiReg).addReg(HiReg);
+ .addReg(SrcHiReg).addReg(t4H);
BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
// cc := if (src_hi == hi) ? cl : ch;
@@ -12777,58 +13627,74 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
}
BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
- .addReg(SrcLoReg).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
- .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
+ .addReg(SrcLoReg).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
+ .addReg(SrcHiReg).addReg(t4H);
} else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
- .addReg(SrcLoReg).addReg(LoReg)
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
+ .addReg(SrcLoReg).addReg(t4L)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
- .addReg(SrcHiReg).addReg(HiReg)
+ // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
+ // 2nd CMOV lowering.
+ mainMBB->addLiveIn(X86::EFLAGS);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
+ .addReg(SrcHiReg).addReg(t4H)
.addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ // Replace the original PHI node as mainMBB is changed after CMOV
+ // lowering.
+ BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
+ .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+ BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
+ .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
+ PhiL->eraseFromParent();
+ PhiH->eraseFromParent();
}
break;
}
case X86::ATOMSWAP6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
break;
}
}
// Copy EDX:EAX back from HiReg:LoReg
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
// Copy ECX:EBX from t1H:t1L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+ if (NewMO.isReg())
+ NewMO.setIsKill(false);
+ MIB.addOperand(NewMO);
+ }
MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Copy EDX:EAX back to t3H:t3L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
+
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- sinkMBB->addLiveIn(X86::EAX);
- sinkMBB->addLiveIn(X86::EDX);
-
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstLoReg)
- .addReg(X86::EAX);
+ .addReg(t3L);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstHiReg)
- .addReg(X86::EDX);
+ .addReg(t3H);
MI->eraseFromParent();
return sinkMBB;
@@ -13239,7 +14105,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MBB->addSuccessor(EndMBB);
}
- unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -14203,6 +15069,18 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
Ld->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as Ld in
+ // terms of dependency. We create a TokenFactor for Ld and ResNode,
+ // and update uses of Ld's output chain to use the TokenFactor.
+ if (Ld->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
}
@@ -14248,7 +15126,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.is256BitVector() &&
+ if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
@@ -14266,127 +15144,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
-
/// PerformTruncateCombine - Converts truncate operation to
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!Subtarget->hasAVX())
- return SDValue();
-
- EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
- EVT OpVT = Op.getValueType();
- DebugLoc dl = N->getDebugLoc();
-
- if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
-
- if (Subtarget->hasAVX2()) {
- // AVX2: v4i64 -> v4i32
-
- // VPERMD
- static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op);
- Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32),
- ShufMask);
-
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
- DAG.getIntPtrConstant(0));
- }
-
- // AVX: v4i64 -> v4i32
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(0));
-
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(2));
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
- // PSHUFD
- static const int ShufMask1[] = {0, 2, 0, 0};
-
- SDValue Undef = DAG.getUNDEF(VT);
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
-
- // MOVLHPS
- static const int ShufMask2[] = {0, 1, 4, 5};
-
- return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
- }
-
- if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
-
- if (Subtarget->hasAVX2()) {
- // AVX2: v8i32 -> v8i16
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
-
- // PSHUFB
- SmallVector<SDValue,32> pshufbMask;
- for (unsigned i = 0; i < 2; ++i) {
- pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
- for (unsigned j = 0; j < 8; ++j)
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- }
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8,
- &pshufbMask[0], 32);
- Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV);
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op);
-
- static const int ShufMask[] = {0, 2, -1, -1};
- Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64),
- &ShufMask[0]);
-
- Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(0));
-
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
- }
-
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(0));
-
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(4));
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
-
- // PSHUFB
- static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1};
-
- SDValue Undef = DAG.getUNDEF(MVT::v16i8);
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
- // MOVLHPS
- static const int ShufMask2[] = {0, 1, 4, 5};
-
- SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
- }
-
return SDValue();
}
@@ -14581,6 +15344,76 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
+static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (!VT.isVector())
+ return 0;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ if (!Subtarget->hasAVX2())
+ return 0;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ if (!Subtarget->hasSSE2())
+ return 0;
+ }
+
+ // SSE2 has only a small subset of the operations.
+ bool hasUnsigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v16i8);
+ bool hasSigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v8i16);
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ }
+ }
+
+ return 0;
+}
+
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
@@ -14861,6 +15694,67 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Match VSELECTs into subs with unsigned saturation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
+ ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on the
+ // left side invert the predicate to simplify logic below.
+ SDValue Other;
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+ Other = RHS;
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
+ Other = LHS;
+ }
+
+ if (Other.getNode() && Other->getNumOperands() == 2 &&
+ DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+ SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
+ SDValue CondRHS = Cond->getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> subus x, y
+ // x > y ? x-y : 0 --> subus x, y
+ if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
+ Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+
+ // If the RHS is a constant we have to reverse the const canonicalization.
+ // x > C-1 ? x+-C : 0 --> subus x, C
+ if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+ isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(-A, VT));
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use ComputeMaskedBits to determine whether
+ // it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> subus x, C
+ if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (A.isSignBit())
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Try to match a min/max vector operation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
+ if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
+ return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -14935,8 +15829,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// Quit if the constant is neither 0 or 1.
return SDValue();
- // Skip 'zext' node.
- if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ // Skip 'zext' or 'trunc' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+ SetCC.getOpcode() == ISD::TRUNCATE)
SetCC = SetCC.getOperand(0);
switch (SetCC.getOpcode()) {
@@ -14955,9 +15850,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
return SDValue();
// Quit if false value is not a constant.
if (!FVal) {
- // A special case for rdrand, where 0 is set if false cond is found.
SDValue Op = SetCC.getOperand(0);
- if (Op.getOpcode() != X86ISD::RDRAND)
+ // Skip 'zext' or 'trunc' node.
+ if (Op.getOpcode() == ISD::ZERO_EXTEND ||
+ Op.getOpcode() == ISD::TRUNCATE)
+ Op = Op.getOperand(0);
+ // A special case for rdrand/rdseed, where 0 is set if false cond is
+ // found.
+ if ((Op.getOpcode() != X86ISD::RDRAND &&
+ Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
@@ -15137,7 +16038,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *CmpAgainst = 0;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
- dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+ !isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
@@ -15158,7 +16059,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
@@ -15247,7 +16147,6 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
}
}
-
// Hardware support for vector shifts is sparse which makes us scalarize the
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
@@ -15271,127 +16170,14 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
- // On X86 with SSE2 support, we can transform this to a vector shift if
- // all elements are shifted by the same amount. We can't do this in legalize
- // because the a constant vector is typically transformed to a constant pool
- // so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
- return SDValue();
-
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasAVX2() ||
- (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
- return SDValue();
-
- SDValue ShAmtOp = N->getOperand(1);
- EVT EltVT = VT.getVectorElementType();
- DebugLoc DL = N->getDebugLoc();
- SDValue BaseShAmt = SDValue();
- if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- // Handle the case where the build_vector is all undef
- // FIXME: Should DAG allow this?
- if (i == NumElts)
- return SDValue();
-
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- if (Arg != BaseShAmt) {
- return SDValue();
- }
- }
- } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- SDValue InVec = ShAmtOp.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = InVec.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
- if (C->getZExtValue() == SplatIdx)
- BaseShAmt = InVec.getOperand(1);
- }
- }
- if (BaseShAmt.getNode() == 0) {
- // Don't create instructions with illegal types after legalize
- // types has run.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
- !DCI.isBeforeLegalize())
- return SDValue();
-
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
- }
- } else
- return SDValue();
-
- // The shift amount is an i32.
- if (EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
- // The shift amount is identical so we can do a vector shift.
- SDValue ValOp = N->getOperand(0);
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Unknown shift opcode!");
- case ISD::SHL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRA:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- }
+ return SDValue();
}
-
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
// and friends. Likewise for OR -> CMPNEQSS.
@@ -15420,8 +16206,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end();
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
@@ -15500,9 +16285,92 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
return false;
}
+// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
+// register. In most cases we actually compare or select YMM-sized registers
+// and mixing the two types creates horrible code. This method optimizes
+// some of the transition sequences.
+static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.is256BitVector())
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::ANY_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+
+ SDValue Narrow = N->getOperand(0);
+ EVT NarrowVT = Narrow->getValueType(0);
+ if (!NarrowVT.is128BitVector())
+ return SDValue();
+
+ if (Narrow->getOpcode() != ISD::XOR &&
+ Narrow->getOpcode() != ISD::AND &&
+ Narrow->getOpcode() != ISD::OR)
+ return SDValue();
+
+ SDValue N0 = Narrow->getOperand(0);
+ SDValue N1 = Narrow->getOperand(1);
+ DebugLoc DL = Narrow->getDebugLoc();
+
+ // The Left side has to be a trunc.
+ if (N0.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ // The type of the truncated inputs.
+ EVT WideVT = N0->getOperand(0)->getValueType(0);
+ if (WideVT != VT)
+ return SDValue();
+
+ // The right side has to be a 'trunc' or a constant vector.
+ bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
+ bool RHSConst = (isSplatVector(N1.getNode()) &&
+ isa<ConstantSDNode>(N1->getOperand(0)));
+ if (!RHSTrunc && !RHSConst)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+ return SDValue();
+
+ // Set N0 and N1 to hold the inputs to the new wide operation.
+ N0 = N0->getOperand(0);
+ if (RHSConst) {
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+ N1->getOperand(0));
+ SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
+ } else if (RHSTrunc) {
+ N1 = N1->getOperand(0);
+ }
+
+ // Generate the wide operation.
+ SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
+ unsigned Opcode = N->getOpcode();
+ switch (Opcode) {
+ case ISD::ANY_EXTEND:
+ return Op;
+ case ISD::ZERO_EXTEND: {
+ unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(InBits);
+ Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::AND, DL, VT,
+ Op, DAG.getConstant(Mask, VT));
+ }
+ case ISD::SIGN_EXTEND:
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+ Op, DAG.getValueType(NarrowVT));
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -15510,9 +16378,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- EVT VT = N->getValueType(0);
-
- // Create ANDN, BLSI, and BLSR instructions
+ // Create BLSI, and BLSR instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
@@ -15520,13 +16386,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
- // Check LHS for not
- if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
- // Check RHS for not
- if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
-
// Check LHS for neg
if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
isZero(N0.getOperand(0)))
@@ -15579,6 +16438,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -15586,15 +16446,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- EVT VT = N->getValueType(0);
-
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
if (!Subtarget->hasSSSE3() ||
- (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+ (VT == MVT::v4i64 && !Subtarget->hasInt256()))
return SDValue();
// Canonicalize pandn to RHS
@@ -15628,13 +16486,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
// Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
- if (Mask.getOpcode() != X86ISD::VSRAI)
- return SDValue();
-
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(1);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ unsigned SraAmt = ~0;
+ if (Mask.getOpcode() == ISD::SRA) {
+ SDValue Amt = Mask.getOperand(1);
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+ SraAmt = C->getZExtValue();
+ }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+ SDValue SraC = Mask.getOperand(1);
+ SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ }
if ((SraAmt + 1) != EltBits)
return SDValue();
@@ -15762,6 +16626,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -15775,8 +16640,6 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasBMI())
return SDValue();
- EVT VT = N->getValueType(0);
-
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
@@ -15807,23 +16670,61 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();
DebugLoc dl = Ld->getDebugLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
+ // On Sandybridge unaligned 256bit loads are inefficient.
ISD::LoadExtType Ext = Ld->getExtensionType();
+ unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
+ if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ NumElems/2);
+ SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ std::min(16U, Alignment));
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load1.getValue(1),
+ Load2.getValue(1));
+
+ SDValue NewVec = DAG.getUNDEF(RegVT);
+ NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
+ NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
+ return DCI.CombineTo(N, NewVec, TF, true);
+ }
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSSE3 shuffles.
+ // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
+ // expansion is still better than scalar code.
+ // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
+ // emit a shuffle and a arithmetic shift.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && RegVT.isInteger() &&
- Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
+ if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
+ (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
- unsigned RegSz = RegVT.getSizeInBits();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
+ return SDValue();
+
// All sizes must be a power of two.
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
return SDValue();
@@ -15847,16 +16748,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Calculate the number of scalar loads that we need to perform
// in order to load our vector from memory.
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+ if (Ext == ISD::SEXTLOAD && NumLoads > 1)
+ return SDValue();
+
+ unsigned loadRegZize = RegSz;
+ if (Ext == ISD::SEXTLOAD && RegSz == 256)
+ loadRegZize /= 2;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
- RegSz/SclrLoadTy.getSizeInBits());
+ loadRegZize/SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- RegSz/MemVT.getScalarType().getSizeInBits());
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -15897,6 +16805,39 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz/MemSz;
+ if (Ext == ISD::SEXTLOAD) {
+ // If we have SSE4.1 we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ return DCI.CombineTo(N, Sext, TF, true);
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
+ return SDValue();
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
+
+ return DCI.CombineTo(N, Shuff, TF, true);
+ }
+
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
@@ -15930,11 +16871,16 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
- if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
- SDValue Value0 = StoredVal.getOperand(0);
- SDValue Value1 = StoredVal.getOperand(1);
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
+ if (VT.is256BitVector() && !Subtarget->hasInt256() &&
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
@@ -15942,10 +16888,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(),
+ std::min(16U, Alignment));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
@@ -16030,7 +16977,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Chains.size());
}
-
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
@@ -16041,8 +16987,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
@@ -16278,7 +17224,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -16293,7 +17239,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal subs from subs of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -16336,7 +17282,6 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1));
}
-
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -16382,58 +17327,57 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!Subtarget->hasAVX())
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
return SDValue();
- EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
- EVT OpVT = Op.getValueType();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
DebugLoc dl = N->getDebugLoc();
- if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
- (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
-
- if (Subtarget->hasAVX2())
- return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
-
- // Optimize vectors in AVX mode
- // Sign extend v8i16 to v8i32 and
- // v4i32 to v4i64
- //
- // Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
- // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
- // concat the vectors to original VT
-
- unsigned NumElems = OpVT.getVectorNumElements();
- SDValue Undef = DAG.getUNDEF(OpVT);
-
- SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask1[i] = i;
-
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
+ // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+ // both SSE and AVX2 since there is no sign-extended shift right
+ // operation on a vector with 64-bit elements.
+ //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+ // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+ if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
- SmallVector<int,8> ShufMask2(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask2[i] = i + NumElems/2;
+ // EXTLOAD has a better solution on AVX2,
+ // it may be replaced with X86ISD::VSEXT node.
+ if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+ if (!ISD::isNormalLoad(N00.getNode()))
+ return SDValue();
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
+ if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+ SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
+ N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+ }
+ }
+ return SDValue();
+}
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
- VT.getVectorNumElements()/2);
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
- OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
- OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+ if (!Subtarget->hasFp256())
+ return SDValue();
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && VT.getSizeInBits() == 256) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
}
+
return SDValue();
}
@@ -16487,58 +17431,26 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = N->getDebugLoc();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- EVT OpVT = N0.getValueType();
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() != X86ISD::SETCC_CARRY)
- return SDValue();
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C || C->getZExtValue() != 1)
- return SDValue();
- return DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
- N00.getOperand(0), N00.getOperand(1)),
- DAG.getConstant(1, VT));
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
}
- // Optimize vectors in AVX mode:
- //
- // v8i16 -> v8i32
- // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
- // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
- // Concat upper and lower parts.
- //
- // v4i32 -> v4i64
- // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
- // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
- // Concat upper and lower parts.
- //
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!Subtarget->hasAVX())
- return SDValue();
-
- if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
- ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
-
- if (Subtarget->hasAVX2())
- return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
-
- SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
- SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec);
- SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec);
-
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ if (VT.is256BitVector()) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
}
return SDValue();
@@ -16570,8 +17482,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
@@ -16589,13 +17501,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
+ // Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
@@ -16751,7 +17663,7 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
@@ -16784,7 +17696,7 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
@@ -16803,7 +17715,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -16841,13 +17754,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
@@ -17030,7 +17944,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17048,7 +17962,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17074,8 +17988,6 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
}
-
-
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
@@ -17152,17 +18064,17 @@ TargetLowering::ConstraintWeight
case 'f':
case 't':
case 'u':
- if (type->isFloatingPointTy())
- weight = CW_SpecificReg;
- break;
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
case 'y':
- if (type->isX86_MMXTy() && Subtarget->hasMMX())
- weight = CW_SpecificReg;
- break;
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
case 'x':
case 'Y':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
- ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
weight = CW_Register;
break;
case 'I':
@@ -17530,7 +18442,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -17543,7 +18455,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
- } else if (VT == MVT::i32) {
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -17560,7 +18472,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
- } else if (VT == MVT::i64) {
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -17598,207 +18510,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return Res;
}
-
-//===----------------------------------------------------------------------===//
-//
-// X86 cost model.
-//
-//===----------------------------------------------------------------------===//
-
-struct X86CostTblEntry {
- int ISD;
- MVT Type;
- unsigned Cost;
-};
-
-static int
-FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-struct X86TypeConversionCostTblEntry {
- int ISD;
- MVT Dst;
- MVT Src;
- unsigned Cost;
-};
-
-static int
-FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
- int ISD, MVT Dst, MVT Src) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-unsigned
-X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty) const {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86CostTblEntry AVX1CostTable[] = {
- // We don't have to scalarize unsupported ops. We can issue two half-sized
- // operations and we only need to extract the upper YMM half.
- // Two ops + 1 extract + 1 insert = 4.
- { ISD::MUL, MVT::v8i32, 4 },
- { ISD::SUB, MVT::v8i32, 4 },
- { ISD::ADD, MVT::v8i32, 4 },
- { ISD::MUL, MVT::v4i64, 4 },
- { ISD::SUB, MVT::v4i64, 4 },
- { ISD::ADD, MVT::v4i64, 4 },
- };
-
- // Look for AVX1 lowering tricks.
- if (ST.hasAVX()) {
- int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
- LT.second);
- if (Idx != -1)
- return LT.first * AVX1CostTable[Idx].Cost;
- }
- // Fallback to the default implementation.
- return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
-}
-
-unsigned
-X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- assert(Val->isVectorTy() && "This must be a vector type");
-
- if (Index != -1U) {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val);
-
- // This type is legalized to a scalar type.
- if (!LT.second.isVector())
- return 0;
-
- // The type may be split. Normalize the index to the new type.
- unsigned Width = LT.second.getVectorNumElements();
- Index = Index % Width;
-
- // Floating point scalars are already located in index #0.
- if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
- return 0;
- }
-
- return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
-}
-
-unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
- Type *ValTy,
- Type *CondTy) const {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
- MVT MTy = LT.second;
-
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- const X86Subtarget &ST =
- TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86CostTblEntry SSE42CostTbl[] = {
- { ISD::SETCC, MVT::v2f64, 1 },
- { ISD::SETCC, MVT::v4f32, 1 },
- { ISD::SETCC, MVT::v2i64, 1 },
- { ISD::SETCC, MVT::v4i32, 1 },
- { ISD::SETCC, MVT::v8i16, 1 },
- { ISD::SETCC, MVT::v16i8, 1 },
- };
-
- static const X86CostTblEntry AVX1CostTbl[] = {
- { ISD::SETCC, MVT::v4f64, 1 },
- { ISD::SETCC, MVT::v8f32, 1 },
- // AVX1 does not support 8-wide integer compare.
- { ISD::SETCC, MVT::v4i64, 4 },
- { ISD::SETCC, MVT::v8i32, 4 },
- { ISD::SETCC, MVT::v16i16, 4 },
- { ISD::SETCC, MVT::v32i8, 4 },
- };
-
- static const X86CostTblEntry AVX2CostTbl[] = {
- { ISD::SETCC, MVT::v4i64, 1 },
- { ISD::SETCC, MVT::v8i32, 1 },
- { ISD::SETCC, MVT::v16i16, 1 },
- { ISD::SETCC, MVT::v32i8, 1 },
- };
-
- if (ST.hasSSE42()) {
- int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTbl[Idx].Cost;
- }
-
- if (ST.hasAVX()) {
- int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTbl[Idx].Cost;
- }
-
- if (ST.hasAVX2()) {
- int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX2CostTbl[Idx].Cost;
- }
-
- return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
-}
-
-unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
- Type *Dst,
- Type *Src) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
-
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-
- const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
- { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
- { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
- { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
- };
-
- if (ST.hasAVX()) {
- int Idx = FindInConvertTable(AVXConversionTbl,
- array_lengthof(AVXConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVXConversionTbl[Idx].Cost;
- }
-
- return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-}
-
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 465c6036ada6..5725f7aea581 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,15 +15,14 @@
#ifndef X86ISELLOWERING_H
#define X86ISELLOWERING_H
-#include "X86Subtarget.h"
-#include "X86RegisterInfo.h"
#include "X86MachineFunctionInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetOptions.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
namespace llvm {
namespace X86ISD {
@@ -176,13 +175,14 @@ namespace llvm {
/// PSIGN - Copy integer sign.
PSIGN,
- /// BLENDV - Blend where the selector is an XMM.
+ /// BLENDV - Blend where the selector is a register.
BLENDV,
- /// BLENDxx - Blend where the selector is an immediate.
- BLENDPW,
- BLENDPS,
- BLENDPD,
+ /// BLENDI - Blend where the selector is an immediate.
+ BLENDI,
+
+ // SUBUS - Integer sub with unsigned saturation.
+ SUBUS,
/// HADD - Integer horizontal add.
HADD,
@@ -196,6 +196,12 @@ namespace llvm {
/// FHSUB - Floating point horizontal sub.
FHSUB,
+ /// UMAX, UMIN - Unsigned integer max and min.
+ UMAX, UMIN,
+
+ /// SMAX, SMIN - Signed integer max and min.
+ SMAX, SMIN,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -228,11 +234,8 @@ namespace llvm {
// EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
- /// TC_RETURN - Tail call return.
- /// operand #0 chain
- /// operand #1 callee (register or absolute)
- /// operand #2 stack adjustment
- /// operand #3 optional in flag
+ /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+ /// the list of operands.
TC_RETURN,
// VZEXT_MOVL - Vector move low and zero extend.
@@ -272,8 +275,6 @@ namespace llvm {
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
-
BLSI, // BLSI - Extract lowest set isolated bit
BLSMSK, // BLSMSK - Get mask up to lowest set bit
BLSR, // BLSR - Reset lowest set bit
@@ -290,7 +291,7 @@ namespace llvm {
TESTP,
// Several flavors of instructions with vector shuffle behaviors.
- PALIGN,
+ PALIGNR,
PSHUFD,
PSHUFHW,
PSHUFLW,
@@ -355,10 +356,17 @@ namespace llvm {
// RDRAND - Get a random integer and indicate whether it is valid in CF.
RDRAND,
+ // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+ // indicate whether it is valid in CF.
+ RDSEED,
+
// PCMP*STRI
PCMPISTRI,
PCMPESTRI,
+ // XTEST - Test if in transactional execution.
+ XTEST,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -470,7 +478,7 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -496,23 +504,29 @@ namespace llvm {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If
- /// 'IsZeroVal' is true, that means it's safe to return a
- /// non-scalar-integer type, e.g. empty string source, constant, or loaded
- /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
- /// constant so it does not need to be loaded.
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal, bool MemcpyStrSrc,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+ /// specified type to expand memcpy / memset inline. This is mostly true
+ /// for all types except for some special cases. For example, on X86
+ /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+ /// also does type conversion. Note the specified type doesn't have to be
+ /// legal as the hook is used before type legalization.
+ virtual bool isSafeMemOpType(MVT VT) const;
+
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
- return true;
- }
+ /// unaligned memory accesses. of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
@@ -630,6 +644,7 @@ namespace llvm {
/// result out to 64 bits.
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
@@ -710,7 +725,7 @@ namespace llvm {
protected:
std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
+ findRepresentativeClass(MVT VT) const;
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
@@ -783,9 +798,7 @@ namespace llvm {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -800,18 +813,18 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
@@ -828,8 +841,9 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
@@ -838,7 +852,7 @@ namespace llvm {
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -861,9 +875,8 @@ namespace llvm {
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
- virtual EVT
- getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
- ISD::NodeType ExtendKind) const;
+ virtual MVT
+ getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -932,23 +945,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-
- class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
- public:
- explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
- VectorTargetTransformImpl(TL) {}
-
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
-
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const;
-
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const;
- };
}
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 54b91c3edb8b..ba1aede3c1a0 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -84,15 +84,16 @@ defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
-def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+ [(int_x86_mmx_femms)]>;
-def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
- "prefetch $addr", []>;
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+ "prefetch\t$addr",
+ [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>;
-// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in
-def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
- "prefetchw $addr", []>;
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+ [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB,
+ Requires<[HasPrefetchW]>;
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f790611b8f8c..225e9720da0c 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
@@ -29,48 +29,59 @@ def LEA32r : I<0x8D, MRMSrcMem,
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+ [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>,
Requires<[In64BitMode]>;
let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
// Extra precision multiplication
// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
-
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
- [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
-
+ "mul{w}\t$src",
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ "mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
- IIC_MUL32_REG>;
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ "mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
- IIC_MUL64>;
-
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
-
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
- IIC_IMUL8>; // AL,AH = AL*GR8
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
- IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
- IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
- IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
- // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
} // neverHasSideEffects
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
TB;
-}
+} // isCommutable, SchedRW
// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -158,7 +180,7 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(X86smul_flag GR16:$src1, (load addr:$src2)))],
IIC_IMUL16_RM>,
TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
@@ -172,18 +194,20 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(X86smul_flag GR64:$src1, (load addr:$src2)))],
IIC_IMUL64_RM>,
TB;
+} // SchedRW
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))],
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))],
IIC_IMUL16_RRI>, OpSize;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))],
IIC_IMUL64_RRI>;
-
+} // SchedRW
// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,12 +285,15 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (load addr:$src1),
i64immSExt8:$src2))],
IIC_IMUL64_RMI>;
+} // SchedRW
} // Defs = [EFLAGS]
// unsigned division/remainder
+let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -279,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", [], IIC_DIV8_MEM>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", [], IIC_DIV16>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", [], IIC_DIV32>;
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", [], IIC_DIV64>;
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", [], IIC_IDIV8>;
@@ -310,21 +344,27 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", [], IIC_IDIV8>;
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", [], IIC_IDIV32>;
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", [], IIC_IDIV64>;
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
+} // hasSideEffects = 0
//===----------------------------------------------------------------------===//
// Two address Instructions.
@@ -333,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
@@ -349,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
(implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -366,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
} // Defs = [EFLAGS]
// Note: NOT does not set EFLAGS!
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -386,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -400,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
[(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
@@ -412,11 +457,11 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
IIC_UNARY_REG>,
@@ -430,31 +475,31 @@ def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
// In 64-bit mode, single byte INC and DEC cannot be encoded.
let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
IIC_UNARY_REG>,
Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
IIC_UNARY_REG>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -469,7 +514,7 @@ let CodeSize = 2 in {
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-
+
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
// FIXME: What is this for??
@@ -489,21 +534,21 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
IIC_UNARY_REG>,
@@ -512,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -530,7 +575,7 @@ let CodeSize = 2 in {
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
@@ -544,57 +589,57 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
/// VT - This is the value type itself.
ValueType VT = vt;
-
+
/// InstrSuffix - This is the suffix used on instructions with this type. For
/// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
string InstrSuffix = instrsuffix;
-
+
/// RegClass - This is the register class associated with this type. For
/// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
RegisterClass RegClass = regclass;
-
+
/// LoadNode - This is the load node associated with this type. For
/// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
PatFrag LoadNode = loadnode;
-
+
/// MemOperand - This is the memory operand associated with this type. For
/// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
X86MemOperand MemOperand = memoperand;
-
+
/// ImmEncoding - This is the encoding of an immediate of this type. For
/// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
/// since the immediate fields of i64 instructions is a 32-bit sign extended
/// value.
ImmType ImmEncoding = immkind;
-
+
/// ImmOperand - This is the operand kind of an immediate of this type. For
/// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
/// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
/// extended value.
Operand ImmOperand = immoperand;
-
+
/// ImmOperator - This is the operator that should be used to match an
/// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
SDPatternOperator ImmOperator = immoperator;
-
+
/// Imm8Operand - This is the operand kind to use for an imm8 of this type.
/// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
/// only used for instructions that have a sign-extended imm8 field form.
Operand Imm8Operand = imm8operand;
-
+
/// Imm8Operator - This is the operator that should be used to match an 8-bit
/// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
SDPatternOperator Imm8Operator = imm8operator;
-
+
/// HasOddOpcode - This bit is true if the instruction should have an odd (as
/// opposed to even) opcode. Operations on i8 are usually even, operations on
/// other datatypes are odd.
bit HasOddOpcode = hasOddOpcode;
-
+
/// HasOpSizePrefix - This bit is set to true if the instruction should have
/// the 0x66 operand size prefix. This is set for i16 types.
bit HasOpSizePrefix = hasOpSizePrefix;
-
+
/// HasREX_WPrefix - This bit is set to true if the instruction should have
/// the 0x40 REX prefix. This is set for i64 types.
bit HasREX_WPrefix = hasREX_WPrefix;
@@ -624,12 +669,12 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
/// or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
string mnemonic, string args, list<dag> pattern,
InstrItinClass itin = IIC_BIN_NONMEM>
: I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
- f, outs, ins,
+ f, outs, ins,
!strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
itin> {
@@ -644,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -687,18 +733,22 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
}
// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
}
// BinOpRM - Instructions like "add reg, reg, [mem]".
@@ -706,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -742,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -764,13 +816,13 @@ class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst, EFLAGS,
+ [(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst, EFLAGS,
+ [(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
EFLAGS))]>;
@@ -779,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -789,7 +842,7 @@ class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-
+
// BinOpRI8_F - Instructions like "cmp reg, imm8".
class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -817,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -845,21 +899,22 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
// BinOpMI_RMW - Instructions like "add [mem], imm".
class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
(implicit EFLAGS)]>;
@@ -867,7 +922,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI_F - Instructions like "cmp [mem], imm".
class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src))],
opcode>;
@@ -877,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -909,10 +965,11 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []> {
+ mnemonic, operands, []>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
+ let hasSideEffects = 0;
}
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
@@ -928,61 +985,61 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
- def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
- def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
- def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
- def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
- def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
-
- def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
- def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
- def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
- def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
}
} // Constraints = "$src1 = $dst"
- def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
@@ -999,61 +1056,61 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
- def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
- def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
- def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
- def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
}
} // Constraints = "$src1 = $dst"
- def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1067,60 +1124,60 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Defs = [EFLAGS] in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
- def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
- def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
- def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
- def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
}
- def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
@@ -1180,7 +1237,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
-
+
def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
"{$src, %al|AL, $src}">;
def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
@@ -1194,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
// register class is constrained to GR8_NOREX.
let isPseudo = 1 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
- "", [], IIC_BIN_NONMEM>;
+ "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1204,12 +1261,13 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag> {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))],
- IIC_BIN_NONMEM>;
+ [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1217,6 +1275,17 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
}
+let Predicates = [HasBMI] in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm GR64:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// MULX Instruction
//
@@ -1225,12 +1294,12 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- [], IIC_MUL8>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- [], IIC_MUL8>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>;
}
}
@@ -1240,3 +1309,53 @@ let Predicates = [HasBMI2] in {
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// ADCX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
+ def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize;
+
+ def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
+
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
+ def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize;
+
+ def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ADOX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
+ def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS;
+
+ def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
+
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
+ def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS;
+
+ def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ }
+}
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index adeaf5410dcc..a967a4da5cf7 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,20 +16,20 @@
// SetCC instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- isCommutable = 1 in {
- def #NAME#16rr
+ isCommutable = 1, SchedRW = [WriteALU] in {
+ def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
IIC_CMOV16_RR>,TB,OpSize;
- def #NAME#32rr
+ def NAME#32rr
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst,
(X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
IIC_CMOV32_RR>, TB;
- def #NAME#64rr
+ def NAME#64rr
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst,
@@ -37,19 +37,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
IIC_CMOV32_RR>, TB;
}
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
- def #NAME#16rm
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteALULd, ReadAfterLd] in {
+ def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
CondNode, EFLAGS))], IIC_CMOV16_RM>,
TB, OpSize;
- def #NAME#32rm
+ def NAME#32rm
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
- def #NAME#64rm
+ def NAME#64rm
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
- IIC_SET_R>, TB;
+ IIC_SET_R>, TB, Sched<[WriteALU]>;
def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)],
- IIC_SET_M>, TB;
+ IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
} // Uses = [EFLAGS]
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 9e6f27988f71..d9ff0c63c55f 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
+let SchedRW = [WriteSystem] in {
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)], IIC_RET>;
+ [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)], IIC_RET>;
+ [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
Requires<[In64BitMode]>;
}
}
+} // SchedRW
let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
@@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +231,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+ Sched<[WriteZero]>;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +248,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)],
- IIC_ALU_NONMEM>;
+ IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
@@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
+let SchedRW = [WriteMicrocoded] in {
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
@@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
@@ -513,18 +518,22 @@ def CMOV_RFP80 : I<0, Pseudo,
multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
- def #NAME#8 : I<0, Pseudo, (outs GR8:$dst),
- (ins i8mem:$ptr, GR8:$val),
- !strconcat(mnemonic, "8 PSEUDO!"), []>;
- def #NAME#16 : I<0, Pseudo,(outs GR16:$dst),
- (ins i16mem:$ptr, GR16:$val),
- !strconcat(mnemonic, "16 PSEUDO!"), []>;
- def #NAME#32 : I<0, Pseudo, (outs GR32:$dst),
- (ins i32mem:$ptr, GR32:$val),
- !strconcat(mnemonic, "32 PSEUDO!"), []>;
- def #NAME#64 : I<0, Pseudo, (outs GR64:$dst),
- (ins i64mem:$ptr, GR64:$val),
- !strconcat(mnemonic, "64 PSEUDO!"), []>;
+ let Defs = [EFLAGS, AL] in
+ def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
+ (ins i8mem:$ptr, GR8:$val),
+ !strconcat(mnemonic, "8 PSEUDO!"), []>;
+ let Defs = [EFLAGS, AX] in
+ def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+ (ins i16mem:$ptr, GR16:$val),
+ !strconcat(mnemonic, "16 PSEUDO!"), []>;
+ let Defs = [EFLAGS, EAX] in
+ def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+ (ins i32mem:$ptr, GR32:$val),
+ !strconcat(mnemonic, "32 PSEUDO!"), []>;
+ let Defs = [EFLAGS, RAX] in
+ def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+ (ins i64mem:$ptr, GR64:$val),
+ !strconcat(mnemonic, "64 PSEUDO!"), []>;
}
}
@@ -559,10 +568,11 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in
- def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- !strconcat(mnemonic, "6432 PSEUDO!"), []>;
+ let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
+ mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
+ def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ !strconcat(mnemonic, "6432 PSEUDO!"), []>;
}
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
@@ -589,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"or{l}\t{$zero, $dst|$dst, $zero}",
- [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
+ Sched<[WriteALULd, WriteRMW]>;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
- [(X86MemBarrier)]>;
+ [(X86MemBarrier)]>, Sched<[WriteLoad]>;
// RegOpc corresponds to the mr version of the instruction
// ImmOpc corresponds to the mi version of the instruction
@@ -602,79 +613,80 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
// ImmMod corresponds to the instruction format of the mi and mi8 versions
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-
-def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
- MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- !strconcat(mnemonic, "{b}\t",
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, LOCK;
+def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
+def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
-def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, OpSize, LOCK;
-def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- !strconcat(mnemonic, "{l}\t",
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
-def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, LOCK;
-
-def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
- ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
- !strconcat(mnemonic, "{b}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize, LOCK;
-
-def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
- !strconcat(mnemonic, "{l}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize, LOCK;
-def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+
+def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat(mnemonic, "{l}\t",
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
-def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
}
@@ -689,20 +701,21 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-
-def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
+ !strconcat(mnemonic, "{b}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+ !strconcat(mnemonic, "{w}\t$dst"),
+ [], IIC_UNARY_MEM>, OpSize, LOCK;
+def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+ !strconcat(mnemonic, "{l}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+ !strconcat(mnemonic, "{q}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
-def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [], IIC_UNARY_MEM>, OpSize, LOCK;
-def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
-def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
}
}
@@ -714,43 +727,44 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
- def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
- !strconcat(mnemonic, "\t$ptr"),
- [(frag addr:$ptr)], itin>, TB, LOCK;
+ def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
+ !strconcat(mnemonic, "\t$ptr"),
+ [(frag addr:$ptr)], itin>, TB, LOCK;
}
}
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag,
InstrItinClass itin8, InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in
- def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
- !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+ def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
let Defs = [AX, EFLAGS], Uses = [AX] in
- def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
- !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+ def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
let Defs = [EAX, EFLAGS], Uses = [EAX] in
- def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
- !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+ def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
let Defs = [RAX, EFLAGS], Uses = [RAX] in
- def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
- !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+ def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
}
}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
X86cas8, i64mem,
IIC_CMPX_LOCK_8B>;
}
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b] in {
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem,
IIC_CMPX_LOCK_16B>, REX_W;
@@ -763,34 +777,35 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag,
InstrItinClass itin8, InstrItinClass itin> {
- let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
- def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
- [(set GR8:$dst,
- (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
- itin8>;
- def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
+ def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin8>;
+ def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
- GR16:$dst,
- (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
- itin>, OpSize;
- def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
itin>;
- def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR64:$dst,
- (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
- itin>;
}
}
@@ -985,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
// Direct PC relative function call for small code model. 32-bit displacement
@@ -1076,12 +1088,14 @@ def : Pat<(X86cmp GR64:$src1, 0),
// inverted.
multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
Instruction Inst64> {
- def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
- (Inst16 GR16:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
- (Inst32 GR32:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
- (Inst64 GR64:$src2, addr:$src1)>;
+ let Predicates = [HasCMov] in {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+ }
}
defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
@@ -1185,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
+// Try this before the selecting to OR.
+let AddedComplexity = 5, SchedRW = [WriteALU] in {
let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1232,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo,
[(set GR64:$dst, (or_is_add GR64:$src1,
i64immSExt32:$src2))]>;
}
-} // AddedComplexity
+} // AddedComplexity, SchedRW
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index bfe954114c55..0e696513d47c 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -20,7 +20,7 @@
// The X86retflag return instructions are variadic because we may add ST0 and
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP in {
+ hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)], IIC_RET>;
@@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
}
// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
@@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
}
// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
IIC_Jcc>;
@@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
@@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in {
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
+ [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>,
+ Sched<[WriteJump]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
+ [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
+ [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
+ Sched<[WriteJump]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
+ [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
+ "ljmp{w}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
+ "ljmp{l}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
- "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+ "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJump]>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
+ "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+ "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
}
// Loop instructions
-
+let SchedRW = [WriteJump] in {
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
+}
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -152,27 +162,32 @@ let isCall = 1 in
let Uses = [ESP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst),
- "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
+ "call{l}\t$dst", [], IIC_CALL_RI>,
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
- "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
- Requires<[In32BitMode]>;
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
+ IIC_CALL_MEM>,
+ Requires<[In32BitMode,FavorMemIndirectCall]>,
+ Sched<[WriteJumpLd]>;
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"lcall{w}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>, OpSize;
+ IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"lcall{l}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>;
+ IIC_CALL_FAR_PTR>, Sched<[WriteJump]>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
+ "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+ "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
// callw for 16 bit code for the assembler.
let isAsmParserOnly = 1 in
@@ -185,7 +200,7 @@ let isCall = 1 in
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP] in {
def TCRETURNdi : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset), []>;
@@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
// RSP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead. Uses for argument
// registers are added manually.
-let isCall = 1, Uses = [RSP] in {
+let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
@@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in {
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
IIC_CALL_MEM>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode,FavorMemIndirectCall]>;
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
"lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
@@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", [], IIC_CALL_RI>,
- Requires<[IsWin64]>;
+ Requires<[IsWin64]>, Sched<[WriteJump]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Uses = [RSP],
- usesCustomInserter = 1 in {
+ isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
+ SchedRW = [WriteJump] in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
[]>;
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded21b..6dc7175357b3 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+ Sched<[WriteALU]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+ Sched<[WriteALULd]>;
// movzbq and movzwq encodings for the disassembler
def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALU]>;
def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALU]>;
def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// FIXME: These should be Pat patterns.
let isCodeGenOnly = 1 in {
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
// Use movzbl instead of movzbq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
- IIC_MOVZX>, TB;
+ IIC_MOVZX>, TB, Sched<[WriteALULd]>;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+ Sched<[WriteALU]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
- IIC_MOVZX>;
+ IIC_MOVZX>, Sched<[WriteALULd]>;
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 959d91a9ab6b..7759a8a2dabb 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
defm r213 : fma3p_rm<opc213,
- !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ !strconcat(OpcodeStr, "213", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
- !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ !strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
defm r231 : fma3p_rm<opc231,
- !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ !strconcat(OpcodeStr, "231", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
} // neverHasSideEffects = 1
}
@@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
ComplexPattern mem_cpat> {
let neverHasSideEffects = 1 in {
- defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
- defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
x86memop, RC, OpVT, mem_frag>;
}
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
x86memop, RC, OpVT, mem_frag, OpNode>,
- fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
memop, mem_cpat, Int, RC>;
}
@@ -220,7 +220,7 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
// For disassembler
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -294,7 +294,7 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -307,8 +307,6 @@ let isCodeGenOnly = 1 in {
} // isCodeGenOnly = 1
}
-let Predicates = [HasFMA4] in {
-
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
int_x86_fma_vfmadd_ss>;
@@ -338,29 +336,33 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
int_x86_fma_vfnmsub_sd>;
-defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-} // HasFMA4
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+}
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 568726e08ece..2224a08d59f4 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -422,7 +422,7 @@ def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
}
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
IIC_FLD>;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
@@ -436,7 +436,7 @@ def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
IIC_FILD>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
IIC_FST>;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
@@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
} // Predicates = [HasSSE3]
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
IIC_FST>;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
@@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
}
// FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
IIC_FLD>, D9;
def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
@@ -499,6 +500,7 @@ def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
IIC_FST>, DD;
def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
IIC_FXCH>, D9;
+}
// Floating point constant loads.
let isReMaterializable = 1 in {
@@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
+let SchedRW = [WriteZero] in {
def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
-
+}
// Floating point compares.
+let SchedRW = [WriteFAdd] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
} // Defs = [FPSW]
+let SchedRW = [WriteFAdd] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
@@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
"fcompi\t$reg", IIC_FCOMI>, DF;
}
+} // SchedRW
// Floating point flag ops.
+let SchedRW = [WriteALU] in {
let Defs = [AX], Uses = [FPSW] in
def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
(outs), (ins), "fnstsw %ax",
@@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
[(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
-
+} // SchedRW
let mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
- (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>;
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>,
+ Sched<[WriteLoad]>;
// FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in
def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
"ffree\t$reg", IIC_FFREE>, DD;
-
// Clear exceptions
let Defs = [FPSW] in
def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
+} // SchedRW
// Operandless floating-point instructions for the disassembler.
+let SchedRW = [WriteMicrocoded] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
@@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 268e9fc9c017..0ef9491eb7fc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -45,14 +45,15 @@ def MRM_D0 : Format<45>;
def MRM_D1 : Format<46>;
def MRM_D4 : Format<47>;
def MRM_D5 : Format<48>;
-def MRM_D8 : Format<49>;
-def MRM_D9 : Format<50>;
-def MRM_DA : Format<51>;
-def MRM_DB : Format<52>;
-def MRM_DC : Format<53>;
-def MRM_DD : Format<54>;
-def MRM_DE : Format<55>;
-def MRM_DF : Format<56>;
+def MRM_D6 : Format<49>;
+def MRM_D8 : Format<50>;
+def MRM_D9 : Format<51>;
+def MRM_DA : Format<52>;
+def MRM_DB : Format<53>;
+def MRM_DC : Format<54>;
+def MRM_DD : Format<55>;
+def MRM_DE : Format<56>;
+def MRM_DF : Format<57>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -208,47 +209,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern>
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
@@ -257,12 +258,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
- InstrItinClass itin = IIC_DEFAULT>
+ InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, [], itin> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
- InstrItinClass itin = IIC_DEFAULT>
+ InstrItinClass itin = NoItinerary>
: X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
let FPForm = fp;
let Pattern = pattern;
@@ -275,14 +276,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
@@ -292,7 +293,7 @@ def __xs : XS;
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -303,7 +304,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SIi8 - SSE 1 & 2 scalar instructions
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -350,25 +351,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// VPSI - SSE1 instructions with TB prefix in AVX form.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
@@ -388,42 +389,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
@@ -433,15 +434,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// S3DI - SSE3 instructions with XD prefix.
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE3]>;
@@ -458,19 +459,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
// classes. They need to be enabled even if AVX is enabled.
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSSE3]>;
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasSSSE3]>;
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSSE3]>;
@@ -480,11 +481,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSE41]>;
@@ -492,19 +493,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSE42]>;
@@ -514,11 +515,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// AVX8I - AVX instructions with T8 and OpSize prefix.
// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX]>;
@@ -528,11 +529,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// AVX28I - AVX2 instructions with T8 and OpSize prefix.
// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX2]>;
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX2]>;
@@ -541,53 +542,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasAES]>;
// PCLMUL Instruction Templates
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, Requires<[HasPCLMUL]>;
class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8,
- OpSize, VEX_4V, Requires<[HasFMA]>;
+ OpSize, VEX_4V, FMASC, Requires<[HasFMA]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
XOP, XOP9, Requires<[HasXOP]>;
// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
XOP, XOP8, Requires<[HasXOP]>;
// XOP 5 operand instruction (VEX encoding!)
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
@@ -595,33 +596,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
let Pattern = pattern;
let CodeSize = 3;
}
class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
// MMX Instruction templates
@@ -635,23 +636,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 73ba0011df1b..2a72fb6f7b2a 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>]>;
+def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
+def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
+def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
+def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
+
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -128,6 +133,7 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
+def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
@@ -154,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
-def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -187,9 +193,7 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5a99ff004d48..7ba542c87520 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -17,15 +17,15 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -297,7 +297,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
{ X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
{ X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
{ X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
@@ -355,7 +355,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
{ X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
// AVX 128-bit versions of foldable instructions
- { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
{ X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -467,9 +467,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::RSQRTSSr, X86::RSQRTSSm, 0 },
{ X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
{ X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
- { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 },
{ X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
- { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 },
{ X86::SQRTSDr, X86::SQRTSDm, 0 },
{ X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
{ X86::SQRTSSr, X86::SQRTSSm, 0 },
@@ -510,27 +508,25 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
{ X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
- { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
{ X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
- { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 },
- { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 },
- { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 },
- { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 },
- { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 },
- { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
- { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
- { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
- { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 },
- { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 },
- { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 },
- { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 },
- { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 },
- { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 },
- { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 },
- { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VPABSBrr128, X86::VPABSBrm128, 0 },
+ { X86::VPABSDrr128, X86::VPABSDrm128, 0 },
+ { X86::VPABSWrr128, X86::VPABSWrm128, 0 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
+ { X86::VRCPPSr, X86::VRCPPSm, 0 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
{ X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
{ X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
@@ -541,28 +537,41 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
- { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
// AVX2 foldable instructions
- { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
- { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
- { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 },
- { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 },
- { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 },
- { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 },
- { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 },
- { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 },
- { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
- { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VPABSBrr256, X86::VPABSBrm256, 0 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, 0 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, 0 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- // BMI/BMI2 foldable instructions
+ // BMI/BMI2/LZCNT/POPCNT foldable instructions
+ { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
+ { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BLSI32rr, X86::BLSI32rm, 0 },
+ { X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
+ { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
+ { X86::BLSR32rr, X86::BLSR32rm, 0 },
+ { X86::BLSR64rr, X86::BLSR64rm, 0 },
+ { X86::BZHI32rr, X86::BZHI32rm, 0 },
+ { X86::BZHI64rr, X86::BZHI64rm, 0 },
+ { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
+ { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
+ { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
+ { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
+ { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
+ { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
{ X86::RORX32ri, X86::RORX32mi, 0 },
{ X86::RORX64ri, X86::RORX64mi, 0 },
{ X86::SARX32rr, X86::SARX32rm, 0 },
@@ -571,6 +580,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
+ { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
+ { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -691,21 +703,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
- { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
- { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
{ X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
{ X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
{ X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
- { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
{ X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
- { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
{ X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@@ -756,6 +760,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
+ { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
+ { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
{ X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
@@ -827,31 +839,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
- { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 },
- { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 },
+ { X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPSrr, X86::VADDPSrm, 0 },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
{ X86::VADDSSrr, X86::VADDSSrm, 0 },
- { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 },
- { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 },
- { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 },
- { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
- { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
- { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
- { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 },
- { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 },
- { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 },
- { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 },
- { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
- { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
+ { X86::VANDPDrr, X86::VANDPDrm, 0 },
+ { X86::VANDPSrr, X86::VANDPSrm, 0 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
{ X86::VCMPSSrr, X86::VCMPSSrm, 0 },
- { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 },
- { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
{ X86::VDIVSDrr, X86::VDIVSDrm, 0 },
{ X86::VDIVSSrr, X86::VDIVSSrm, 0 },
{ X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
@@ -862,263 +874,267 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
{ X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
{ X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
- { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 },
- { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 },
- { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 },
- { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
{ X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
{ X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
- { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 },
- { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 },
- { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 },
- { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
- { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
- { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
- { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 },
- { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 },
- { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 },
- { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 },
+ { X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPSrr, X86::VMINPSrm, 0 },
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
- { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
- { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
- { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 },
- { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
- { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPSrr, X86::VMULPSrm, 0 },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
{ X86::VMULSSrr, X86::VMULSSrm, 0 },
- { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 },
- { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
- { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
- { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
- { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 },
- { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
- { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
- { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
- { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
- { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
- { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
- { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 },
- { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 },
- { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
- { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 },
- { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
- { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
- { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 },
- { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 },
- { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 },
- { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
- { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
- { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 },
- { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
- { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
- { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
- { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
- { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
- { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
- { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
- { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
- { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
- { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
- { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
- { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
- { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
- { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
- { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 },
- { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
- { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
- { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
- { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
- { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
- { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
- { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
- { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
- { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
- { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
- { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 },
- { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
- { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
- { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
- { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
- { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
- { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
- { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
- { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
- { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
- { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
- { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 },
- { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 },
- { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 },
- { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 },
- { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 },
- { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 },
- { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 },
- { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 },
- { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 },
- { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 },
- { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 },
- { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 },
- { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 },
- { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 },
- { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 },
- { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 },
+ { X86::VORPDrr, X86::VORPDrm, 0 },
+ { X86::VORPSrr, X86::VORPSrm, 0 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
+ { X86::VPADDBrr, X86::VPADDBrm, 0 },
+ { X86::VPADDDrr, X86::VPADDDrm, 0 },
+ { X86::VPADDQrr, X86::VPADDQrm, 0 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
+ { X86::VPADDWrr, X86::VPADDWrm, 0 },
+ { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
+ { X86::VPANDNrr, X86::VPANDNrm, 0 },
+ { X86::VPANDrr, X86::VPANDrm, 0 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
+ { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
+ { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
+ { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
+ { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
+ { X86::VPORrr, X86::VPORrm, 0 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
+ { X86::VPSRADrr, X86::VPSRADrm, 0 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
+ { X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
{ X86::VSUBSDrr, X86::VSUBSDrm, 0 },
{ X86::VSUBSSrr, X86::VSUBSSrm, 0 },
- { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 },
- { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 },
- { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
- { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
- { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
- { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
+ { X86::VXORPDrr, X86::VXORPDrm, 0 },
+ { X86::VXORPSrr, X86::VXORPSrm, 0 },
// AVX 256-bit foldable instructions
- { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 },
- { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 },
- { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 },
- { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 },
- { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 },
- { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 },
- { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 },
- { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 },
- { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 },
- { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 },
- { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 },
- { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 },
- { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 },
- { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 },
- { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 },
- { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 },
- { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 },
- { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 },
- { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 },
- { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 },
- { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 },
- { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 },
- { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 },
- { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 },
- { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 },
- { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 },
- { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 },
- { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 },
- { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 },
- { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 },
- { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 },
- { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 },
- { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 },
- { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 },
- { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 },
- { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 },
- { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 },
- { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 },
- { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 },
- { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 },
- { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 },
- { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 },
- { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 },
- { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 },
- { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 },
- { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 },
+ { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
+ { X86::VORPDYrr, X86::VORPDYrm, 0 },
+ { X86::VORPSYrr, X86::VORPSYrm, 0 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
// AVX2 foldable instructions
- { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 },
- { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 },
- { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 },
- { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 },
- { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 },
- { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 },
- { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 },
- { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 },
- { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 },
- { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 },
- { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 },
- { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 },
- { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 },
- { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 },
- { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 },
- { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 },
- { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 },
- { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 },
- { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 },
- { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 },
- { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 },
- { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 },
- { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 },
- { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 },
- { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 },
- { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 },
- { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 },
- { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 },
- { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
- { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
- { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
- { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 },
- { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
- { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 },
- { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
- { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
- { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
- { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
- { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
- { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
- { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
- { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
- { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
- { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
- { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
- { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
- { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
- { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 },
- { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 },
- { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 },
- { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 },
- { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
- { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
- { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
- { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
- { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
- { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
- { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
- { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
- { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
- { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },
- { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 },
- { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 },
- { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 },
- { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 },
- { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 },
- { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 },
- { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 },
- { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 },
- { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 },
- { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 },
- { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 },
- { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 },
- { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 },
- { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 },
- { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 },
- { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 },
- { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 },
- { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 },
- { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 },
- { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 },
- { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
- { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
+ { X86::VPANDYrr, X86::VPANDYrm, 0 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
+ { X86::VPERMQYri, X86::VPERMQYmi, 0 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
+ { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
+ { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
+ { X86::VPORYrr, X86::VPORYrm, 0 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
+ { X86::VPXORYrr, X86::VPXORYrm, 0 },
// FIXME: add AVX 256-bit foldable instructions
// FMA4 foldable patterns
@@ -1156,8 +1172,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
// BMI/BMI2 foldable instructions
+ { X86::ANDN32rr, X86::ANDN32rm, 0 },
+ { X86::ANDN64rr, X86::ANDN64rm, 0 },
{ X86::MULX32rr, X86::MULX32rm, 0 },
{ X86::MULX64rr, X86::MULX64rm, 0 },
+ { X86::PDEP32rr, X86::PDEP32rm, 0 },
+ { X86::PDEP64rr, X86::PDEP64rm, 0 },
+ { X86::PEXT32rr, X86::PEXT32rm, 0 },
+ { X86::PEXT64rr, X86::PEXT64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1551,16 +1573,19 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::MOVDQUrm:
case X86::VMOVSSrm:
case X86::VMOVSDrm:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::FsVMOVAPSrm:
@@ -2159,7 +2184,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
MI->setDesc(get(Opc));
MI->getOperand(3).setImm(Size-Amt);
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -2238,7 +2263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Fallthrough intended.
}
default:
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
}
@@ -2840,6 +2865,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3149,19 +3176,15 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
- case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
- case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m:
+ case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
case X86::DEC64_32r: case X86::DEC64_16r:
- case X86::DEC64_32m: case X86::DEC64_16m:
case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
- case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m:
+ case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
case X86::INC64_32r: case X86::INC64_16r:
- case X86::INC64_32m: case X86::INC64_16m:
case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
@@ -3177,6 +3200,8 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
+ case X86::ANDN32rr: case X86::ANDN32rm:
+ case X86::ANDN64rr: case X86::ANDN64rm:
return true;
}
}
@@ -3499,43 +3524,44 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
/// to:
/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
///
-static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
+ const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- unsigned Reg = MI->getOperand(0).getReg();
- MI->setDesc(Desc);
+ unsigned Reg = MIB->getOperand(0).getReg();
+ MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
// implicit operands.
- MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
// But we don't trust that.
- assert(MI->getOperand(1).getReg() == Reg &&
- MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+ assert(MIB->getOperand(1).getReg() == Reg &&
+ MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
return true;
}
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {
case X86::SETB_C8r:
- return Expand2AddrUndef(MI, get(X86::SBB8rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
- return Expand2AddrUndef(MI, get(X86::SBB16rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:
- return Expand2AddrUndef(MI, get(X86::SBB32rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:
- return Expand2AddrUndef(MI, get(X86::SBB64rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB64rr));
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
- return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
+ return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
case X86::V_SETALLONES:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
- return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr));
+ return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -3561,9 +3587,10 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
+ // Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(NewMI);
+ MachineInstrBuilder MIB(MF, NewMI);
unsigned NumAddrOps = MOs.size();
for (unsigned i = 0; i != NumAddrOps; ++i)
MIB.addOperand(MOs[i]);
@@ -3587,9 +3614,10 @@ static MachineInstr *FuseInst(MachineFunction &MF,
unsigned Opcode, unsigned OpNo,
const SmallVectorImpl<MachineOperand> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
+ // Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(NewMI);
+ MachineInstrBuilder MIB(MF, NewMI);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -3627,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
bool isTwoAddrFold = false;
+
+ // Atom favors register form of call. So, we do not fold loads into calls
+ // when X86Subtarget is Atom.
+ if (isCallRegIndirect &&
+ (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+ return NULL;
+ }
+
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -3836,8 +3873,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize) &&
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3878,8 +3915,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize) &&
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3982,6 +4019,21 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
default: {
+ if ((LoadMI->getOpcode() == X86::MOVSSrm ||
+ LoadMI->getOpcode() == X86::VMOVSSrm) &&
+ MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+ > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return NULL;
+ if ((LoadMI->getOpcode() == X86::MOVSDrm ||
+ LoadMI->getOpcode() == X86::VMOVSDrm) &&
+ MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+ > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return NULL;
+
// Folding a normal load. Just copy the load's address operands.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -4049,7 +4101,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
return true;
- return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+ return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -4114,7 +4166,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the data processing instruction.
MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(DataMI);
+ MachineInstrBuilder MIB(MF, DataMI);
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
@@ -4620,13 +4672,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::DIVSSrr:
case X86::DIVSSrr_Int:
case X86::SQRTPDm:
- case X86::SQRTPDm_Int:
case X86::SQRTPDr:
- case X86::SQRTPDr_Int:
case X86::SQRTPSm:
- case X86::SQRTPSm_Int:
case X86::SQRTPSr:
- case X86::SQRTPSr_Int:
case X86::SQRTSDm:
case X86::SQRTSDm_Int:
case X86::SQRTSDr:
@@ -4645,13 +4693,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VDIVSSrr:
case X86::VDIVSSrr_Int:
case X86::VSQRTPDm:
- case X86::VSQRTPDm_Int:
case X86::VSQRTPDr:
- case X86::VSQRTPDr_Int:
case X86::VSQRTPSm:
- case X86::VSQRTPSm_Int:
case X86::VSQRTPSr:
- case X86::VSQRTPSr_Int:
case X86::VSQRTSDm:
case X86::VSQRTSDm_Int:
case X86::VSQRTSDr:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 650fa95d7f23..ccc1aa2e35a5 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -142,6 +142,9 @@ def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
+def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -247,9 +250,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
-def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>;
-def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>;
-def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>;
+def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>;
+def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
+def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -525,6 +528,13 @@ def lea64_32mem : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
+// Memory operands that use 64-bit pointers in both ILP32 and LP64.
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
@@ -535,6 +545,12 @@ def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
+// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
+def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or,
+ frameindex, X86WrapperRIP],
+ []>;
+
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
@@ -590,13 +606,19 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
+def HasHLE : Predicate<"Subtarget->hasHLE()">;
+def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
+def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<"!Mode64Bit">;
+ AssemblerPredicate<"!Mode64Bit", "32-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<"Mode64Bit">;
+ AssemblerPredicate<"Mode64Bit", "64-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
@@ -612,6 +634,7 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -744,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
//
// Nop
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
"nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
@@ -755,8 +778,9 @@ let neverHasSideEffects = 1 in {
// Constructing a stack frame.
def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", [], IIC_ENTER>;
+ "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
+let SchedRW = [WriteALU] in {
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
@@ -766,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG16>, OpSize;
def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -789,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize;
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -818,29 +843,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
Requires<[In32BitMode]>;
-}
+} // mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP64r : I<0x58, AddRegFrm,
(outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>;
def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>;
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+ SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -851,25 +877,24 @@ def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>, Sched<[WriteLoad]>;
let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
- Requires<[In64BitMode]>;
-
-
+ Requires<[In64BitMode]>, Sched<[WriteStore]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad=1, neverHasSideEffects=1 in {
-def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>,
+ mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore=1, neverHasSideEffects=1 in {
-def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>,
+ mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
-let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -878,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm,
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
// Bit scan instructions.
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
- IIC_BSF>, TB, OpSize;
+ IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
- IIC_BSF>, TB, OpSize;
+ IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+ Sched<[WriteShift]>;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShift]>;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteShift]>;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
IIC_BSR>, TB,
- OpSize;
+ OpSize, Sched<[WriteShiftLd]>;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
- IIC_BSR>, TB;
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
- IIC_BSR>, TB;
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
} // Defs = [EFLAGS]
-
+let SchedRW = [WriteMicrocoded] in {
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
@@ -959,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -975,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
@@ -992,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
}
+} // SchedRW
+let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1005,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1026,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
// FIXME: These definitions are utterly broken
// Just leave them commented out for now because they're useless outside
@@ -1043,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
*/
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1054,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1069,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
[(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
}
+let SchedRW = [WriteStore] in {
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1081,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1089,34 +1125,37 @@ let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
+ Sched<[WriteMove]>;
let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteStore]>;
let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteLoad]>;
}
// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1127,31 +1166,35 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
// only for now.
-def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi16 addr:$src1), GR16:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, OpSize, TB, Requires<[FastBTMem]>;
-def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi32 addr:$src1), GR32:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, TB, Requires<[FastBTMem]>;
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi64 addr:$src1), GR64:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, TB;
-
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi16 addr:$src1), GR16:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, OpSize, TB, Requires<[FastBTMem]>;
+ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi32 addr:$src1), GR32:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB, Requires<[FastBTMem]>;
+ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi64 addr:$src1), GR64:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB;
+}
+
+let SchedRW = [WriteALU] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1164,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
IIC_BT_RI>, TB;
+} // SchedRW
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1180,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
-
+let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1189,6 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1196,6 +1246,9 @@ def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1203,6 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1210,7 +1266,9 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+let SchedRW = [WriteALU] in {
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1218,6 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1225,6 +1286,9 @@ def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1232,6 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1239,7 +1306,9 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+let SchedRW = [WriteALU] in {
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1247,6 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1254,6 +1326,9 @@ def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1261,6 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1268,6 +1346,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+} // hasSideEffects = 0
} // Defs = [EFLAGS]
@@ -1279,41 +1359,42 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
// operand is referenced, the atomicity is ensured.
multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
InstrItinClass itin> {
- let Constraints = "$val = $dst" in {
- def #NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR8:$dst,
- (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
- itin>;
- def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
+ def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin>;
+ def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
- GR16:$dst,
- (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
- itin>, OpSize;
- def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
itin>;
- def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR64:$dst,
- (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
- itin>;
}
}
defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
// Swap between registers.
+let SchedRW = [WriteALU] in {
let Constraints = "$val = $dst" in {
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
@@ -1338,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
Requires<[In64BitMode]>;
def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
"xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
+} // SchedRW
-
-
+let SchedRW = [WriteALU] in {
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1350,8 +1431,9 @@ def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
@@ -1364,6 +1446,7 @@ def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
+let SchedRW = [WriteALU] in {
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
IIC_CMPXCHG_REG8>, TB;
@@ -1376,7 +1459,9 @@ def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
IIC_CMPXCHG_REG>, TB;
+} // SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
@@ -1400,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
"cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
TB, Requires<[HasCmpxchg16b]>;
-
+} // SchedRW
// Lock instruction prefix
@@ -1423,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
// String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
+}
+let SchedRW = [WriteSystem] in {
def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
-
+}
// Flag instructions
+let SchedRW = [WriteALU] in {
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
@@ -1443,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
// Table lookup instructions
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
+ Sched<[WriteLoad]>;
+let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1476,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[In32BitMode]>;
+} // SchedRW
+let SchedRW = [WriteSystem] in {
// Check Array Index Against Bounds
def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1486,17 +1580,19 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
Requires<[In32BitMode]>;
// Adjust RPL Field of Segment Selector
-def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
Requires<[In32BitMode]>;
-def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[In32BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1509,6 +1605,8 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
T8;
+ }
+ let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1521,6 +1619,7 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
T8;
+ }
}
//===----------------------------------------------------------------------===//
@@ -1539,6 +1638,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
}
//===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS] in {
+ def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins),
+ "rdseed{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB;
+ def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+ "rdseed{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB;
+ def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins),
+ "rdseed{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
// LZCNT Instruction
//
let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
@@ -1605,26 +1719,26 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
PatFrag ld_frag> {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V;
+ [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V;
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>,
+ [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>,
T8, VEX_4V;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
- X86blsr_flag, loadi32>;
+ X86blsr, loadi32>;
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
- X86blsr_flag, loadi64>, VEX_W;
+ X86blsr, loadi64>, VEX_W;
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
- X86blsmsk_flag, loadi32>;
+ X86blsmsk, loadi32>;
defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
- X86blsmsk_flag, loadi64>, VEX_W;
+ X86blsmsk, loadi64>, VEX_W;
defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
- X86blsi_flag, loadi32>;
+ X86blsi, loadi32>;
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
- X86blsi_flag, loadi64>, VEX_W;
+ X86blsi, loadi64>, VEX_W;
}
multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -1886,6 +2000,8 @@ def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcom", (COM_FST0r ST1)>;
+def : InstAlias<"fcomp", (COMP_FST0r ST1)>;
def : InstAlias<"fcomi", (COM_FIr ST1)>;
def : InstAlias<"fcompi", (COM_FIPr ST1)>;
def : InstAlias<"fucom", (UCOM_Fr ST1)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 127af6f7f93a..49721df7c118 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -20,6 +20,7 @@
// MMX Multiclasses
//===----------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def MMX_INTALU_ITINS : OpndItins<
IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
>;
@@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins<
def MMX_PHADDSUBD : OpndItins<
IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
>;
+}
+let Sched = WriteVecIMul in
def MMX_PMUL_ITINS : OpndItins<
IIC_MMX_PMUL, IIC_MMX_PMUL
>;
+let Sched = WriteVecALU in {
def MMX_PSADBW_ITINS : OpndItins<
IIC_MMX_PSADBW, IIC_MMX_PSADBW
>;
@@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins<
def MMX_MISC_FUNC_ITINS : OpndItins<
IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
>;
+}
def MMX_SHIFT_ITINS : ShiftOpndItins<
IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
>;
+let Sched = WriteShuffle in {
def MMX_UNPCK_H_ITINS : OpndItins<
IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
>;
@@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins<
def MMX_PSHUF_ITINS : OpndItins<
IIC_MMX_PSHUF, IIC_MMX_PSHUF
>;
+} // Sched
+let Sched = WriteCvtF2I in {
def MMX_CVT_PD_ITINS : OpndItins<
IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
>;
@@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins<
def MMX_CVT_PS_ITINS : OpndItins<
IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
>;
+}
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
@@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> {
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]> {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
@@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>;
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[WriteVecShift]>;
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>;
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
}
@@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
+ [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (memopmmx addr:$src))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// Binary MMX instructions requiring SSSE3.
@@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
- (bitconvert (memopmmx addr:$src2))))], itins.rm>;
+ (bitconvert (memopmmx addr:$src2))))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, OpndItins itins, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>,
+ Sched<[itins.Sched]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
PatFrag ld_frag, string asm, Domain d> {
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- IIC_DEFAULT, d>;
+ NoItinerary, d>;
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- IIC_DEFAULT, d>;
+ NoItinerary, d>;
}
//===----------------------------------------------------------------------===//
@@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
+ "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
+ Sched<[WriteStore]>;
// Low word of MMX to GPR.
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
@@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
- (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
+ (MMX_X86movd2w (x86mmx VR64:$src)))],
+ IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
- [], IIC_MMX_MOV_MM_RM>;
+ [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
+let SchedRW = [WriteMove] in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+let SchedRW = [WriteLoad] in {
let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)],
IIC_MMX_MOVQ_RM>;
+} // SchedRW
+let SchedRW = [WriteMove] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
@@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
+} // SchedRW
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
- IIC_MMX_MOVQ_RM>;
+ IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
let AddedComplexity = 15 in
// movd to MMX register zero-extends
@@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
@@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
[(set VR64:$dst,
(x86mmx (X86vzmovl (x86mmx
(scalar_to_vector (loadi32 addr:$src))))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
- IIC_MMX_PSHUF>;
+ IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
imm:$src2))],
- IIC_MMX_PSHUF>;
+ IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
@@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
(iPTR imm:$src2)))],
- IIC_MMX_PEXTR>;
+ IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
@@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32:$src2, (iPTR imm:$src3)))],
- IIC_MMX_PINSRW>;
+ IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
@@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
(iPTR imm:$src3)))],
- IIC_MMX_PINSRW>;
+ IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Mask creation
@@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
+let SchedRW = [WriteShuffle] in {
let Uses = [EDI] in
def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
@@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
IIC_MMX_MASKMOV>;
+}
// 64-bit bit convert.
let Predicates = [HasSSE2] in {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 6f48d7ed7fe1..384238741b18 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
}
class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
// scalar
+let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<
IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
>;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
def SSE_ALU_F64S : OpndItins<
IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
>;
+}
def SSE_ALU_ITINS_S : SizeItins<
SSE_ALU_F32S, SSE_ALU_F64S
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32S : OpndItins<
IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
>;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
def SSE_MUL_F64S : OpndItins<
IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
>;
+}
def SSE_MUL_ITINS_S : SizeItins<
SSE_MUL_F32S, SSE_MUL_F64S
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32S : OpndItins<
IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
>;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
def SSE_DIV_F64S : OpndItins<
IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
>;
+}
def SSE_DIV_ITINS_S : SizeItins<
SSE_DIV_F32S, SSE_DIV_F64S
>;
// parallel
+let Sched = WriteFAdd in {
def SSE_ALU_F32P : OpndItins<
IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
>;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
def SSE_ALU_F64P : OpndItins<
IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
>;
+}
def SSE_ALU_ITINS_P : SizeItins<
SSE_ALU_F32P, SSE_ALU_F64P
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32P : OpndItins<
IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
>;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
def SSE_MUL_F64P : OpndItins<
IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
>;
+}
def SSE_MUL_ITINS_P : SizeItins<
SSE_MUL_F32P, SSE_MUL_F64P
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32P : OpndItins<
IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
>;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
def SSE_DIV_F64P : OpndItins<
IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
>;
+}
def SSE_DIV_ITINS_P : SizeItins<
SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
>;
+let Sched = WriteVecALU in {
def SSE_INTALU_ITINS_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
def SSE_INTALUQ_ITINS_P : OpndItins<
IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
>;
+}
+let Sched = WriteVecIMul in
def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], itins.rr>;
+ RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, mem_cpat:$src2))], itins.rm>;
+ RC:$src1, mem_cpat:$src2))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,54 +210,36 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1,
- bit rr_hasSideEffects = 0> {
- let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
+ bit Is2Addr = 1> {
+ let isCommutable = 1, hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, IIC_DEFAULT, d>;
+ pat_rr, NoItinerary, d>,
+ Sched<[WriteVecLogic]>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, IIC_DEFAULT, d>;
-}
-
-/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
-multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
- string asm, string SSEVer, string FPSizeStr,
- X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, OpndItins itins, bit Is2Addr = 1> {
- def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
- def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
+ pat_rm, NoItinerary, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
}
//===----------------------------------------------------------------------===//
@@ -367,7 +370,7 @@ let Predicates = [HasAVX] in {
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -384,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
@@ -401,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX] in {
+ isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
@@ -439,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
@@ -458,93 +461,70 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
- SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
- [(set VR128:$dst, (vt (OpNode VR128:$src1,
- (scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string base_opc,
+ string asm_opr> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
-// Loading from memory automatically zeroing upper bits.
-class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr> :
- SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
-
-// AVX
-def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
- VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
- VEX_LIG;
-
-// For the disassembler
-let isCodeGenOnly = 1 in {
- def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XS, VEX_4V, VEX_LIG;
- def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XD, VEX_4V, VEX_LIG;
+ // For the disassembler
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
- VEX_LIG;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
- VEX_LIG;
-}
+multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string OpcodeStr> {
+ // AVX
+ defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ VEX_4V, VEX_LIG;
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XS, VEX, VEX_LIG;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XD, VEX, VEX_LIG;
+ def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ VEX, VEX_LIG, Sched<[WriteStore]>;
+ // SSE1 & 2
+ let Constraints = "$src1 = $dst" in {
+ defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}">;
+ }
-// SSE1 & 2
-let Constraints = "$src1 = $dst" in {
- def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $dst|$dst, $src2}">, XS;
- def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+ def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
+}
- // For the disassembler
- let isCodeGenOnly = 1 in {
- def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XS;
- def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XD;
- }
+// Loading from memory automatically zeroing upper bits.
+multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> {
+ def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+ def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
}
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
let AddedComplexity = 20 in
- def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+ defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
}
-def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-
// Patterns
let Predicates = [HasAVX] in {
let AddedComplexity = 15 in {
@@ -791,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -836,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
TB, OpSize;
+let SchedRW = [WriteStore] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -868,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -926,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
+let SchedRW = [WriteStore] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -942,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1055,7 +1041,7 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1090,7 +1076,7 @@ let Predicates = [UseSSE1] in {
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>, VEX;
@@ -1107,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
let isCodeGenOnly = 1 in {
def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
@@ -1132,36 +1118,46 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
-multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
- SDNode psnode, SDNode pdnode, string base_opc,
- string asm_opr, InstrItinClass itin> {
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, string asm_opr,
+ InstrItinClass itin> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
- [(set RC:$dst,
- (psnode RC:$src1,
+ [(set VR128:$dst,
+ (psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- itin, SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
def PDrm : PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
- [(set RC:$dst, (v2f64 (pdnode RC:$src1,
+ [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- itin, SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+
}
-let AddedComplexity = 20 in {
- defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, InstrItinClass itin> {
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $dst|$dst, $src2}",
+ itin>;
}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+
+let AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+ IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1182,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// Shuffle with VMOVLPS
@@ -1257,16 +1254,11 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 20 in {
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
-}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+ defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+ IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1291,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// VMOVHPS patterns
@@ -1341,14 +1334,14 @@ let AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1356,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
let Predicates = [HasAVX] in {
@@ -1397,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_PS : OpndItins<
IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_Scalar : OpndItins<
IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_32 : OpndItins<
IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_64 : OpndItins<
IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
@@ -1422,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1433,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
OpndItins itins> {
let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>;
+ [], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>;
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1444,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2F]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2FLd, ReadAfterLd]>;
} // neverHasSideEffects = 1
}
@@ -1457,7 +1457,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1465,26 +1465,43 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
XS, VEX_4V, VEX_LIG;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
let Predicates = [HasAVX] in {
@@ -1511,27 +1528,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS;
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD;
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD, REX_W;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -1540,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1555,38 +1596,38 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
- int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
@@ -1596,13 +1637,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss", SSE_CVT_Scalar>, XS;
+ "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64,
"cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2sd", SSE_CVT_Scalar>, XD;
+ "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
"cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
@@ -1616,40 +1657,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ "cvttss2si", SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ "cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
+ "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
@@ -1666,6 +1707,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+
/// SSE 2 Only
// Convert scalar double to scalar single
@@ -1673,13 +1748,15 @@ let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1688,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))],
- IIC_SSE_CVT_Scalar_RR>;
+ IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1715,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert scalar single to scalar double
@@ -1731,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
- XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f64 (fextend FR32:$src)),
@@ -1756,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[UseSSE2]>;
+ Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1778,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
// Convert Packed Double FP to Packed DW Integers
@@ -1839,7 +1926,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- VEX;
+ VEX, Sched<[WriteCvtF2I]>;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1847,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtF2I]>;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
@@ -1867,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1879,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+ Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1954,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -1967,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1993,12 +2083,13 @@ let Predicates = [HasAVX] in {
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>,
+ Sched<[WriteCvtF2ILd]>;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2006,32 +2097,32 @@ let Predicates = [HasAVX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB;
+ IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB;
+ IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2039,30 +2130,33 @@ let Predicates = [HasAVX] in {
let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX;
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+ Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2F]>;
}
let neverHasSideEffects = 1, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
@@ -2079,7 +2173,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2088,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
// AVX 256-bit register conversion intrinsics
@@ -2165,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RR>;
+ IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RM>;
+ IIC_SSE_ALU_F32S_RM>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -2213,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
- itins.rr>;
+ itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
@@ -2248,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
- IIC_SSE_COMIS_RR, d>;
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))],
- IIC_SSE_COMIS_RM, d>;
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
@@ -2310,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2399,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2488,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2585,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W;
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2616,18 +2723,18 @@ let Predicates = [HasAVX] in {
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L;
+ SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L;
+ OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2657,17 +2764,16 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop,
- OpndItins itins,
- bit IsCommutable = 0,
- bit Is2Addr = 1> {
+ X86MemOperand x86memop, OpndItins itins,
+ bit IsCommutable, bit Is2Addr> {
let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -2675,44 +2781,35 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
-// These are ordered here for pattern ordering requirements with the fp versions
+multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
+ ValueType OpVT128, ValueType OpVT256,
+ OpndItins itins, bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
+ VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
-let Predicates = [HasAVX] in {
-defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
+ memopv2i64, i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
+ OpVT256, VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
}
-let Constraints = "$src1 = $dst" in {
-defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0>;
-} // Constraints = "$src1 = $dst"
+// These are ordered here for pattern ordering requirements with the fp versions
-let Predicates = [HasAVX2] in {
-defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
+defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
+ SSE_BIT_ITINS_P, 0>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -2757,6 +2854,20 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
+ defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+
+ defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V, VEX_L;
+
// In AVX no need to add a pattern for 128-bit logical rr ps, because they
// are all promoted to v2i64, and the patterns are covered by the int
// version. This is needed in SSE only, because v2i64 isn't supported on
@@ -2764,7 +2875,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2773,6 +2884,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(memopv2i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V;
+
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
@@ -2789,31 +2901,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
}
}
-/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
-///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
- defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem,
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
-
- defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem,
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>,
- TB, OpSize, VEX_4V, VEX_L;
-}
-
-// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
-
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
@@ -2848,26 +2935,32 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins,
- bit Is2Addr = 1> {
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SizeItins itins> {
+let Predicates = [HasAVX] in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
+
+ defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, VR256, v8f32, f256mem, memopv8f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
+ defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, VR256, v4f64, f256mem, memopv4f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
- TB;
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s, 1>, TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d, 1>, TB, OpSize;
}
-
-multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- SizeItins itins> {
- defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
- v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
- TB, VEX_L;
- defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
- v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
- TB, OpSize, VEX_L;
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2881,116 +2974,69 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
- SSEPackedSingle, itins.s, Is2Addr>,
- TB;
-
- defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
- SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
}
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
- SizeItins itins> {
- defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
- SSEPackedSingle, itins.s, 0>, TB, VEX_L;
-
- defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
- SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
+let isCodeGenOnly = 1 in {
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
}
-// Binary Arithmetic instructions
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
- VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
- VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
}
}
let isCodeGenOnly = 1 in {
defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
- defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
}
}
@@ -3002,6 +3048,7 @@ let isCodeGenOnly = 1 in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
>;
@@ -3009,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
>;
+}
+let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
>;
@@ -3017,13 +3066,36 @@ def SSE_RCPP : OpndItins<
def SSE_RCPS : OpndItins<
IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
>;
+}
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+ let mayLoad = 1 in {
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3031,204 +3103,238 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
-/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
- def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>, Sched<[itins.Sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
- def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
- def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
-/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
+ Intrinsic V4F32Int, Intrinsic V8F32Int,
+ OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int VR256:$src))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>;
-}
-
-/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
-multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
- def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int VR256:$src))],
- itins.rr>, VEX_L;
- def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+ let mayLoad = 1 in {
+ def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1,f64mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ }
+}
+
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
-}
-
-/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-let hasSideEffects = 0 in
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in {
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- }
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
+ itins.rr>, VEX, Sched<[itins.Sched]>;
+ def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
-/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
-/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))],
- itins.rr>;
- def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
- itins.rm>;
-}
-
-/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int VR256:$src))],
- itins.rr>, VEX_L;
- def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ SSE_SQRTS>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
-let Predicates = [HasAVX] in {
- // Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
- sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
-
- defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
- SSE_SQRTP>,
- sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
- SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
- SSE_SQRTP>,
- sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
- SSE_SQRTP>,
- VEX;
-
- // Reciprocal approximations. Note that these typically require refinement
- // in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
- defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
- SSE_SQRTP>,
- sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTP>, VEX;
-
- defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
- defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
- SSE_RCPP>,
- sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
- SSE_RCPP>, VEX;
-}
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+ int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
+ int_x86_avx_rcp_ps_256, SSE_RCPP>;
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
@@ -3283,59 +3389,11 @@ let Predicates = [HasAVX] in {
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
-// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
- SSE_SQRTS>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
-
-/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
-multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic F32Int, OpndItins itins> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
- // For scalar unary operations, fold a load into the operation
- // only in OptForSize mode. It eliminates an instruction, but it also
- // eliminates a whole-register clobber (the load), so it introduces a
- // partial register update condition.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
- let Constraints = "$src1 = $dst" in {
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
- }
-}
-
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTS>;
let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(RSQRTSSr_Int VR128:$src, VR128:$src)>;
-}
-
-defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
- SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
-let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
(RCPSSr_Int VR128:$src, VR128:$src)>;
}
@@ -3347,52 +3405,48 @@ let Predicates = [UseSSE1] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
-let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3408,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
IIC_SSE_MOVNT>;
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3422,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
@@ -3444,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
IIC_SSE_PREFETCH>, TB;
}
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3463,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
TB, Requires<[HasSSE2]>;
+} // SchedRW
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -3474,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>;
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, VEX;
+ IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, VEX;
+ IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>;
+ IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>;
+ IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3492,23 +3553,23 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX, VEX_L;
-}
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
VEX;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
VEX, VEX_L;
+}
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3525,7 +3586,8 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3542,7 +3604,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3561,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
}
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3570,7 +3633,7 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -3579,8 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
+} // SchedRW
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3592,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3604,30 +3669,23 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
XS, Requires<[UseSSE2]>;
}
-// Intrinsic forms of MOVDQU load and store
-def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVU_P_MR>,
- XS, VEX, Requires<[HasAVX]>;
-
-def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVU_P_MR>,
- XS, Requires<[UseSSE2]>;
-
} // ExeDomain = SSEPackedInt
let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
(VMOVDQUYmr addr:$dst, VR256:$src)>;
}
+let Predicates = [UseSSE2] in
+def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+ (MOVDQUmr addr:$dst, VR128:$src)>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecIMul in
def SSE_PMADD : OpndItins<
IIC_SSE_PMADD, IIC_SSE_PMADD
>;
@@ -3646,14 +3704,33 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+ Intrinsic IntId256, OpndItins itins,
+ bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
+ VR128, memopv2i64, i128mem, itins,
+ IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
+ i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
+ VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
}
multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -3669,23 +3746,25 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
- itins.rr>;
+ itins.rr>, Sched<[WriteVecShift]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
-/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType DstVT, ValueType SrcVT, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
@@ -3697,260 +3776,88 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>;
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
-// 128-bit Integer Arithmetic
+defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 0>;
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
-let Predicates = [HasAVX] in {
-defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
- VEX_4V;
-defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+// Intrinsic forms
+defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
+ int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
+defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+ int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
+defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+ int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+ int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+ int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+ int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+ int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+ int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+ int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+ int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
+defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+ int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
+defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+ int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+
+let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
-
-// Intrinsic forms
-defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1, 0>, VEX_4V;
-defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
+let Predicates = [HasAVX2] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, memopv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-
-// Intrinsic forms
-defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
- VR256, memopv4i64, i256mem,
- SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P>;
+let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
-// Intrinsic forms
-defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1>;
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-
-} // Constraints = "$src1 = $dst"
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
@@ -3983,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -4029,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -4075,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -4132,186 +4039,109 @@ let Predicates = [UseSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
- defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
-defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+ int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>;
+defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+ int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>;
+defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+ int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def ri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-def mi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-}
-
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def Yri : Ii8<0x70, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
-def Ymi : Ii8<0x70, MRMSrcMem,
- (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))]>;
-}
-} // ExeDomain = SSEPackedInt
-
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
+ SDNode OpNode> {
let Predicates = [HasAVX] in {
- let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
-
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+ def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX2] in {
- defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
- TB, OpSize, VEX,VEX_L;
- defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
- XS, VEX, VEX_L;
- defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
- XD, VEX, VEX_L;
+ def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 5 in
- defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+ def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+ def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
+}
+}
+} // ExeDomain = SSEPackedInt
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize;
+defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
+defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -4327,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4336,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4344,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (OpNode VR256:$src1,
- (bc_frag (memopv4i64 addr:$src2))))]>;
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4426,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4435,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))], IIC_SSE_PINSRW>;
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Extract
@@ -4444,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>;
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
let Predicates = [HasAVX] in {
@@ -4457,7 +4294,7 @@ let Predicates = [HasAVX] in {
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst" in
@@ -4469,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4497,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4536,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteMove]>;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
@@ -4578,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4601,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
+let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4633,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
+} //SchedRW
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4641,28 +4483,28 @@ let Predicates = [HasAVX] in
def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
@@ -4670,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX;
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
+let SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -4712,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
+} // SchedRW
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4726,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))],
IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4775,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
//
+
+let SchedRW = [WriteLoad] in {
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4786,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
//
+let SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4800,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
+} // SchedRW
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+ Sched<[WriteStore]>;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4819,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4828,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[UseSSE2]>;
+ XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4858,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
+let SchedRW = [WriteVecLogic] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4870,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
XS, Requires<[UseSSE2]>;
+} // SchedRW
+let SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4886,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let AddedComplexity = 20 in {
let Predicates = [HasAVX] in {
@@ -4903,6 +4757,7 @@ let AddedComplexity = 20 in {
}
// Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4913,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let SchedRW = [WriteMove] in {
let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4933,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (vt (OpNode RC:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpNode (mem_frag addr:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4993,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
let neverHasSideEffects = 1 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [], IIC_SSE_MOV_LH>;
+ [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (X86Movddup
(scalar_to_vector (loadf64 addr:$src)))))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+ Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>;
+ (scalar_to_vector (loadf64 addr:$src)))))]>,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -5059,6 +4919,7 @@ let Predicates = [UseSSE3] in {
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -5072,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
IIC_SSE_LDDQU>;
+}
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
@@ -5085,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : I<0xD0, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -5128,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -5143,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -5199,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
@@ -5207,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst,
(IntId128
(bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
- OpSize;
+ OpSize, Sched<[WriteVecALULd]>;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -5217,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (IntId256 VR256:$src))]>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+ Sched<[WriteVecALULd]>;
}
let Predicates = [HasAVX] in {
@@ -5256,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def SSE_PHADDSUBD : OpndItins<
IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
>;
@@ -5265,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
+}
+let Sched = WriteShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
+let Sched = WriteVecALU in
def SSE_PSIGN : OpndItins<
IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULHRSW : OpndItins<
IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
>;
@@ -5287,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -5295,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5309,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -5317,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5451,7 +5325,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -5459,7 +5333,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5467,63 +5341,63 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
-multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
let Predicates = [HasAVX] in
- defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
let Predicates = [HasAVX2] in
- defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
+ defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGN : ssse3_palign<"palignr">;
+ defm PALIGN : ssse3_palignr<"palignr">;
let Predicates = [HasAVX2] in {
-def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
@@ -5531,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5544,6 +5419,7 @@ let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
+} // SchedRW
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -5850,6 +5726,55 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+ (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXWDYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXDQYrm addr:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQYrm addr:$src)>;
+}
+
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
@@ -5864,6 +5789,15 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5874,6 +5808,34 @@ let Predicates = [UseSSE41] in {
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5934,6 +5896,44 @@ let Predicates = [HasAVX] in {
(VPMOVZXDQrm addr:$src)>;
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
(VPMOVZXDQrm addr:$src)>;
+
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -6273,6 +6273,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F64Int, bit Is2Addr = 1> {
let ExeDomain = GenericDomain in {
// Operation, reg.
+ let hasSideEffects = 0 in
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6306,6 +6307,7 @@ let ExeDomain = GenericDomain in {
OpSize;
// Operation, reg.
+ let hasSideEffects = 0 in
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6378,12 +6380,47 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x3))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x3))>;
+
def : Pat<(v8f32 (ffloor VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (fnearbyint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0xC))>;
+ def : Pat<(v8f32 (fceil VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x2))>;
+ def : Pat<(v8f32 (frint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x4))>;
+ def : Pat<(v8f32 (ftrunc VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x3))>;
+
def : Pat<(v4f64 (ffloor VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (fnearbyint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0xC))>;
+ def : Pat<(v4f64 (fceil VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x2))>;
+ def : Pat<(v4f64 (frint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x4))>;
+ def : Pat<(v4f64 (ftrunc VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6417,8 +6454,25 @@ let Predicates = [UseSSE41] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x3))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x3))>;
}
//===----------------------------------------------------------------------===//
@@ -6575,67 +6629,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0>, VEX_4V;
- defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
- 0>, VEX_4V;
- defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
- 0>, VEX_4V;
- defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
- 0>, VEX_4V;
- defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
- 0>, VEX_4V;
- defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
- 0>, VEX_4V;
- defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
- 0>, VEX_4V;
- defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
- 0>, VEX_4V;
- defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
- 0>, VEX_4V;
- defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
- 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw>, VEX_4V, VEX_L;
- defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
- int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
- defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
- int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
- defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
- int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
- defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
- int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
- defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
- int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
- defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
- int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
- defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
- int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
- defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
- int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
- defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
- int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
- defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
- defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
- defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
- defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
- defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
- defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
- defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
- defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
-}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6659,6 +6652,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+ int_x86_avx2_packusdw>, VEX_4V, VEX_L;
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+ int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+let Predicates = [HasAVX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
@@ -6776,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
- IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -6785,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
[(set RC:$dst,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))],
- IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
let Predicates = [HasAVX] in {
@@ -6839,31 +6902,31 @@ let Predicates = [HasAVX] in {
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+ def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
(imm:$mask))),
- (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
- def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+ (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+ def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
(imm:$mask))),
- (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
- def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
(imm:$mask))),
- (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
(imm:$mask))),
- (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
(imm:$mask))),
- (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+ (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
(imm:$mask))),
- (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
/// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6927,15 +6990,15 @@ let Predicates = [UseSSE41] in {
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
(imm:$mask))),
- (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
(imm:$mask))),
- (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
(imm:$mask))),
- (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
@@ -7821,6 +7884,13 @@ defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
VR256, memopv4i64, i256mem>, VEX_L;
}
+def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+ imm:$mask)),
+ (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
+ imm:$mask)),
+ (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
// destination operand
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 893488c159ea..5b6298b541bc 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -15,7 +15,7 @@
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
@@ -51,6 +51,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
// NOTE: We don't include patterns for shifts of a register by one, because
// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+let hasSideEffects = 0 in {
def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
"shl{b}\t$dst", [], IIC_SR>;
def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
@@ -59,10 +60,12 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t$dst", [], IIC_SR>;
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", [], IIC_SR>;
+} // hasSideEffects = 0
} // isConvertibleToThreeAddress = 1
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
// using CL?
let Uses = [CL] in {
@@ -116,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t$dst",
[(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -161,9 +165,10 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t$dst",
[(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -214,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t$dst",
[(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -271,9 +277,10 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t$dst",
[(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -328,12 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
"sar{q}\t$dst",
[(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Rotate instructions
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst" in {
+let hasSideEffects = 0 in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
@@ -402,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst"
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -455,8 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
}
+} // SchedRW
+} // hasSideEffects = 0
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -508,8 +520,9 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t$dst",
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
@@ -564,8 +577,9 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
"rol{q}\t$dst",
[(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -616,8 +630,9 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
[(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -672,13 +687,14 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
[(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Double shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
@@ -761,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
}
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -836,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -853,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX;
+ []>, TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, i8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX;
+ []>, TAXD, VEX, Sched<[WriteShiftLd]>;
}
}
@@ -866,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX_4VOp3;
+ VEX_4VOp3, Sched<[WriteShift]>;
let mayLoad = 1 in
def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX_4VOp3;
+ VEX_4VOp3,
+ Sched<[WriteShiftLd,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src1
+ ReadAfterLd]>;
}
}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index ea716bfd6bd8..053417ccde63 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
def : Pat<(debugtrap),
(INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
+let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
+let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
"in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
+let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
+let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// Moves to and from segment registers.
//
+let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
+let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
@@ -347,16 +360,18 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
+let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
"sgdt\t$dst", [], IIC_SGDT>, TB;
def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
"sidt\t$dst", []>, TB;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
@@ -374,20 +389,22 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
"lgdt\t$src", [], IIC_LGDT>, TB;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
"lidt\t$src", [], IIC_LIDT>, TB;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-
+} // SchedRW
+
//===----------------------------------------------------------------------===//
// Specialized register support
+let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
+let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
+let SchedRW = [WriteSystem] in {
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
@@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in {
def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
"xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index ad55058ede6c..363a190aa854 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -15,6 +15,9 @@
//===----------------------------------------------------------------------===//
// TSX instructions
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
let usesCustomInserter = 1 in
def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
"# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
@@ -22,11 +25,15 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
let isBranch = 1, isTerminator = 1, Defs = [EAX] in
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
- "xbegin\t$dst", []>;
+ "xbegin\t$dst", []>, Requires<[HasRTM]>;
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 764aa5d4f236..44d8cce05413 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -16,7 +16,7 @@
#include "X86Relocations.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Valgrind.h"
@@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
# define CFI(x)
#endif
-// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
// callee saved registers, for the fastcc calling convention.
extern "C" {
#if defined(X86_64_JIT)
@@ -131,12 +131,12 @@ extern "C" {
"subq $32, %rsp\n"
"movq %rbp, %rcx\n" // Pass prev frame and return address
"movq 8(%rbp), %rdx\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"addq $32, %rsp\n"
#else
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
#endif
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
@@ -213,7 +213,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"movl %ebp, %esp\n" // Restore ESP
CFI(".cfi_def_cfa_register %esp\n")
"subl $12, %esp\n"
@@ -269,7 +269,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"addl $16, %esp\n"
"movaps 48(%esp), %xmm3\n"
CFI(".cfi_restore %xmm3\n")
@@ -300,10 +300,7 @@ extern "C" {
SIZE(X86CompilationCallback_SSE)
);
# else
- // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
- // no support for inline assembly
- static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+ void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
_declspec(naked) void X86CompilationCallback(void) {
__asm {
@@ -317,7 +314,7 @@ extern "C" {
mov eax, dword ptr [ebp+4]
mov dword ptr [esp+4], eax
mov dword ptr [esp], ebp
- call X86CompilationCallback2
+ call LLVMX86CompilationCallback2
mov esp, ebp
sub esp, 12
pop ecx
@@ -337,20 +334,17 @@ extern "C" {
#endif
}
-/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// This is the target-specific function invoked by the
/// function stub when we did not know the real target of a call. This function
/// must locate the start of the stub or call site and pass it into the JIT
/// compiler function.
extern "C" {
-#if !(defined (X86_64_JIT) && defined(_MSC_VER))
- // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
- // no support for inline assembly
-static
-#endif
-void LLVM_ATTRIBUTE_USED
-X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
+ intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
+ // We are reading raw stack data here. Tell MemorySanitizer that it is
+ // sufficiently initialized.
+ __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
assert(*RetAddrLoc == RetAddr &&
"Could not find return address on the stack!");
@@ -517,7 +511,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
// This used to use 0xCD, but that value is used by JITMemoryManager to
// initialize the buffer with garbage, which means it may follow a
- // noreturn function call, confusing X86CompilationCallback2. PR 4929.
+ // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929.
JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
return Result;
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index d7c08dfb0fdf..f916327378a9 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -14,8 +14,8 @@
#ifndef X86JITINFO_H
#define X86JITINFO_H
-#include "llvm/Function.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index cfd68f74b7b2..a8a9fd8accde 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,19 +13,20 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86COFFMachineModuleInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
-#include "llvm/Type.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
using namespace llvm;
namespace {
@@ -238,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
if (!MI->getOperand(OpNo+i).isReg()) continue;
unsigned Reg = MI->getOperand(OpNo+i).getReg();
- if (Reg == 0) continue;
+ // LEAs can use RIP-relative addressing, and RIP has no sub/super register.
+ if (Reg == 0 || Reg == X86::RIP) continue;
MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
}
@@ -405,6 +407,57 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
// TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
// inputs modeled as normal uses instead of implicit uses. As such, truncate
// off all but the first operand (the callee). FIXME: Change isel.
@@ -549,18 +602,14 @@ ReSimplify:
OutMI.setOpcode(X86::RET);
break;
- case X86::MORESTACK_RET_RESTORE_R10: {
- MCInst retInst;
-
+ case X86::MORESTACK_RET_RESTORE_R10:
OutMI.setOpcode(X86::MOV64rr);
OutMI.addOperand(MCOperand::CreateReg(X86::R10));
OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
- retInst.setOpcode(X86::RET);
- AsmPrinter.OutStreamer.EmitInstruction(retInst);
+ AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
break;
}
- }
}
static void LowerTlsAddr(MCStreamer &OutStreamer,
@@ -574,11 +623,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCContext &context = OutStreamer.getContext();
- if (needsPadding) {
- MCInst prefix;
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- }
+ if (needsPadding)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
@@ -628,20 +674,11 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
OutStreamer.EmitInstruction(LEA);
if (needsPadding) {
- MCInst prefix;
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- prefix.setOpcode(X86::REX64_PREFIX);
- OutStreamer.EmitInstruction(prefix);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX));
}
- MCInst call;
- if (is64Bits)
- call.setOpcode(X86::CALL64pcrel32);
- else
- call.setOpcode(X86::CALLpcrel32);
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
const MCSymbolRefExpr *tlsRef =
@@ -649,8 +686,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCSymbolRefExpr::VK_PLT,
context);
- call.addOperand(MCOperand::CreateExpr(tlsRef));
- OutStreamer.EmitInstruction(call);
+ OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+ : X86::CALLpcrel32)
+ .addExpr(tlsRef));
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -694,7 +732,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
case X86::MOVPC32r: {
- MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
// looks like:
// call "L1$pb"
@@ -703,20 +740,17 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Emit the call.
MCSymbol *PICBase = MF->getPICBaseSymbol();
- TmpInst.setOpcode(X86::CALLpcrel32);
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
- OutContext)));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::CALLpcrel32)
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
// popl $reg
- TmpInst.setOpcode(X86::POP32r);
- TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::POP32r)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
@@ -746,12 +780,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::ADD32ri)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(DotExpr));
return;
}
}
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
new file mode 100644
index 000000000000..83e75ea994ca
--- /dev/null
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -0,0 +1,212 @@
+//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will pad short functions to prevent
+// a stall if a function returns before the return address is ready. This
+// is needed for some Intel Atom processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+
+#define DEBUG_TYPE "x86-pad-short-functions"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumBBsPadded, "Number of basic blocks padded");
+
+namespace {
+ struct VisitedBBInfo {
+ // HasReturn - Whether the BB contains a return instruction
+ bool HasReturn;
+
+ // Cycles - Number of cycles until return if HasReturn is true, otherwise
+ // number of cycles until end of the BB
+ unsigned int Cycles;
+
+ VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+ VisitedBBInfo(bool HasReturn, unsigned int Cycles)
+ : HasReturn(HasReturn), Cycles(Cycles) {}
+ };
+
+ struct PadShortFunc : public MachineFunctionPass {
+ static char ID;
+ PadShortFunc() : MachineFunctionPass(ID)
+ , Threshold(4), TM(0), TII(0) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Atom pad short functions";
+ }
+
+ private:
+ void findReturns(MachineBasicBlock *MBB,
+ unsigned int Cycles = 0);
+
+ bool cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles);
+
+ void addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd);
+
+ const unsigned int Threshold;
+
+ // ReturnBBs - Maps basic blocks that return to the minimum number of
+ // cycles until the return, starting from the entry block.
+ DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
+
+ // VisitedBBs - Cache of previously visited BBs.
+ DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
+
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ };
+
+ char PadShortFunc::ID = 0;
+}
+
+FunctionPass *llvm::createX86PadShortFunctions() {
+ return new PadShortFunc();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// NOOP instructions before early exits.
+bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
+ const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize)) {
+ return false;
+ }
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+
+ // Search through basic blocks and mark the ones that have early returns
+ ReturnBBs.clear();
+ VisitedBBs.clear();
+ findReturns(MF.begin());
+
+ bool MadeChange = false;
+
+ MachineBasicBlock *MBB;
+ unsigned int Cycles = 0;
+
+ // Pad the identified basic blocks with NOOPs
+ for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
+ I != ReturnBBs.end(); ++I) {
+ MBB = I->first;
+ Cycles = I->second;
+
+ if (Cycles < Threshold) {
+ // BB ends in a return. Skip over any DBG_VALUE instructions
+ // trailing the terminator.
+ assert(MBB->size() > 0 &&
+ "Basic block should contain at least a RET but is empty");
+ MachineBasicBlock::iterator ReturnLoc = --MBB->end();
+
+ while (ReturnLoc->isDebugValue())
+ --ReturnLoc;
+ assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
+ "Basic block does not end with RET");
+
+ addPadding(MBB, ReturnLoc, Threshold - Cycles);
+ NumBBsPadded++;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// findReturn - Starting at MBB, follow control flow and add all
+/// basic blocks that contain a return to ReturnBBs.
+void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
+ // If this BB has a return, note how many cycles it takes to get there.
+ bool hasReturn = cyclesUntilReturn(MBB, Cycles);
+ if (Cycles >= Threshold)
+ return;
+
+ if (hasReturn) {
+ ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
+ return;
+ }
+
+ // Follow branches in BB and look for returns
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
+ I != MBB->succ_end(); ++I) {
+ if (*I == MBB)
+ continue;
+ findReturns(*I, Cycles);
+ }
+}
+
+/// cyclesUntilReturn - return true if the MBB has a return instruction,
+/// and return false otherwise.
+/// Cycles will be incremented by the number of cycles taken to reach the
+/// return or the end of the BB, whichever occurs first.
+bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles) {
+ // Return cached result if BB was previously visited
+ DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
+ = VisitedBBs.find(MBB);
+ if (it != VisitedBBs.end()) {
+ VisitedBBInfo BBInfo = it->second;
+ Cycles += BBInfo.Cycles;
+ return BBInfo.HasReturn;
+ }
+
+ unsigned int CyclesToEnd = 0;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin();
+ MBBI != MBB->end(); ++MBBI) {
+ MachineInstr *MI = MBBI;
+ // Mark basic blocks with a return instruction. Calls to other
+ // functions do not count because the called function will be padded,
+ // if necessary.
+ if (MI->isReturn() && !MI->isCall()) {
+ VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return true;
+ }
+
+ CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ }
+
+ VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return false;
+}
+
+/// addPadding - Add the given number of NOOP instructions to the function
+/// just prior to the return at MBBI
+void PadShortFunc::addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd) {
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ while (NOOPsToAdd-- > 0) {
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ }
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 73ac7477427f..16886e432d19 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -19,25 +19,25 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
@@ -50,16 +50,18 @@ ForceStackAlign("force-align-stack",
" needed for the function."),
cl::init(false), cl::Hidden);
-cl::opt<bool>
+static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
- : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP,
+ : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP),
X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP)),
TM(tm), TII(tii) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
@@ -175,21 +177,27 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.isTarget64BitLP64())
return &X86::GR64RegClass;
return &X86::GR32RegClass;
case 1: // Normal GPRs except the stack pointer (for encoding reasons).
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.isTarget64BitLP64())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ if (Subtarget.isTargetWin64())
return &X86::GR64_TCW64RegClass;
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ else if (Subtarget.is64Bit())
return &X86::GR64_TCRegClass;
+
+ const Function *F = MF.getFunction();
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
}
}
@@ -227,36 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const uint16_t *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool callsEHReturn = false;
- bool ghcCall = false;
- bool oclBiCall = false;
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
-
- if (MF) {
- callsEHReturn = MF->getMMI().callsEHReturn();
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
- }
-
- if (ghcCall)
+ switch (MF->getFunction()->getCallingConv()) {
+ case CallingConv::GHC:
+ case CallingConv::HiPE:
return CSR_NoRegs_SaveList;
- if (oclBiCall) {
+
+ case CallingConv::Intel_OCL_BI: {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (HasAVX && IsWin64)
- return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+ return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
if (HasAVX && Is64Bit)
- return CSR_64_Intel_OCL_BI_AVX_SaveList;
+ return CSR_64_Intel_OCL_BI_AVX_SaveList;
if (!HasAVX && !IsWin64 && Is64Bit)
- return CSR_64_Intel_OCL_BI_SaveList;
+ return CSR_64_Intel_OCL_BI_SaveList;
+ break;
+ }
+
+ case CallingConv::Cold:
+ if (Is64Bit)
+ return CSR_MostRegs_64_SaveList;
+ break;
+
+ default:
+ break;
}
+
+ bool CallsEHReturn = MF->getMMI().callsEHReturn();
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
- if (callsEHReturn)
+ if (CallsEHReturn)
return CSR_64EHRet_SaveList;
return CSR_64_SaveList;
}
- if (callsEHReturn)
+ if (CallsEHReturn)
return CSR_32EHRet_SaveList;
return CSR_32_SaveList;
}
@@ -273,10 +285,12 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (!HasAVX && !IsWin64 && Is64Bit)
return CSR_64_Intel_OCL_BI_RegMask;
}
- if (CC == CallingConv::GHC)
+ if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
+ if (CC == CallingConv::Cold)
+ return CSR_MostRegs_64_RegMask;
if (IsWin64)
return CSR_Win64_RegMask;
return CSR_64_RegMask;
@@ -380,7 +394,13 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// When we need stack realignment and there are dynamic allocas, we can't
// reference off of the stack pointer, so we reserve a base pointer.
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ //
+ // This is also true if the function contain MS-style inline assembly. We
+ // do this because if any stack changes occur in the inline assembly, e.g.,
+ // "pusha", then any C local variable or C argument references in the
+ // inline assembly will be wrong because the SP is not properly tracked.
+ if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) ||
+ MF.hasMSInlineAsm())
return true;
return false;
@@ -410,7 +430,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
- F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
@@ -430,123 +451,16 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
- if (isInt<8>(Imm))
- return X86::SUB64ri8;
- return X86::SUB64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::SUB32ri8;
- return X86::SUB32ri;
- }
-}
-
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
- if (isInt<8>(Imm))
- return X86::ADD64ri8;
- return X86::ADD64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::ADD32ri8;
- return X86::ADD32ri;
- }
-}
-
-void X86RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
- int Opcode = I->getOpcode();
- bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
- DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
- I = MBB.erase(I);
-
- if (!reseveCallFrame) {
- // If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub ESP, <amt>' and the
- // adjcallstackdown instruction into 'add ESP, <amt>'
- // TODO: consider using push / pop instead of sub + store / add
- if (Amount == 0)
- return;
-
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = 0;
- if (Opcode == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- } else {
- assert(Opcode == TII.getCallFrameDestroyOpcode());
-
- // Factor out the amount the callee already popped.
- Amount -= CalleeAmt;
-
- if (Amount) {
- unsigned Opc = getADDriOpcode(Is64Bit, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- }
- }
-
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
- }
-
- return;
- }
-
- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(CalleeAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // We are not tracking the stack pointer adjustment by the callee, so make
- // sure we restore the stack pointer immediately after the call, there may
- // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
- MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !llvm::prior(I)->isCall())
- --I;
- MBB.insert(I, New);
- }
-}
-
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
unsigned Opc = MI.getOpcode();
@@ -562,7 +476,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
// Now add the frame object offset to the offset from EBP.
int FIOffset;
@@ -573,17 +487,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else
FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
- if (MI.getOperand(i+3).isImm()) {
+ if (MI.getOperand(FIOperandNum+3).isImm()) {
// Offset is a 32-bit integer.
- int Imm = (int)(MI.getOperand(i + 3).getImm());
+ int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
int Offset = FIOffset + Imm;
assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
"Requesting 64-bit offset in 32-bit immediate!");
- MI.getOperand(i + 3).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
- uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
- MI.getOperand(i+3).setOffset(Offset);
+ uint64_t Offset = FIOffset +
+ (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
+ MI.getOperand(FIOperandNum + 3).setOffset(Offset);
}
}
@@ -608,7 +523,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
case MVT::i8:
if (High) {
switch (Reg) {
- default: return getX86SubSuperRegister(Reg, MVT::i64, High);
+ default: return getX86SubSuperRegister(Reg, MVT::i64);
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -728,22 +651,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
return X86::R15D;
}
case MVT::i64:
- // For 64-bit mode if we've requested a "high" register and the
- // Q or r constraints we want one of these high registers or
- // just the register name otherwise.
- if (High) {
- switch (Reg) {
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- // Fallthrough.
- }
- }
switch (Reg) {
default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
@@ -782,46 +689,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
}
}
-
-namespace {
- struct MSAH : public MachineFunctionPass {
- static char ID;
- MSAH() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- const X86TargetMachine *TM =
- static_cast<const X86TargetMachine *>(&MF.getTarget());
- const TargetFrameLowering *TFI = TM->getFrameLowering();
- MachineRegisterInfo &RI = MF.getRegInfo();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned StackAlignment = TFI->getStackAlignment();
-
- // Be over-conservative: scan over all vreg defs and find whether vector
- // registers are used. If yes, there is a possibility that vector register
- // will be spilled and thus require dynamic stack realignment.
- for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setForceFramePointer(true);
- return true;
- }
- }
- // Nothing to do
- return false;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Check";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAH::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 7932ede8dd65..b9d7b8cf8b9a 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,12 +117,9 @@ public:
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
new file mode 100644
index 000000000000..7de6791f2e48
--- /dev/null
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -0,0 +1,126 @@
+//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Haswell to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def HaswellModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+ // instructions per cycle.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 40;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = HaswellModel in {
+
+// Haswell can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, 6 and 7 handle all computation.
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores. Port 7 can handle address calculations.
+def HWPort0 : ProcResource<1>;
+def HWPort1 : ProcResource<1>;
+def HWPort2 : ProcResource<1>;
+def HWPort3 : ProcResource<1>;
+def HWPort4 : ProcResource<1>;
+def HWPort5 : ProcResource<1>;
+def HWPort6 : ProcResource<1>;
+def HWPort7 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
+def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
+def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
+def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
+
+// Integer division issued on port 0.
+def HWDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [HWPort4]>;
+
+def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
+def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [HWPort0156]>;
+def : WriteRes<WriteZero, []>;
+
+defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
+defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
+defm : HWWriteResPair<WriteShift, HWPort056, 1>;
+defm : HWWriteResPair<WriteJump, HWPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [HWPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
+defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
+defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
+defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
+defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
+defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
+defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
+
+// Vector integer operations.
+defm : HWWriteResPair<WriteVecShift, HWPort05, 1>;
+defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
+defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
+defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteShuffle, HWPort15, 1>;
+
+def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
new file mode 100644
index 000000000000..74d5f1b6eba8
--- /dev/null
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -0,0 +1,122 @@
+//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Sandy Bridge to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SandyBridgeModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and SB can decode 4
+ // instructions per cycle.
+ // FIXME: Identify instructions that aren't a single fused micro-op.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 30;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = SandyBridgeModel in {
+
+// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
+
+// Ports 0, 1, and 5 handle all computation.
+def SBPort0 : ProcResource<1>;
+def SBPort1 : ProcResource<1>;
+def SBPort5 : ProcResource<1>;
+
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores.
+def SBPort23 : ProcResource<2>;
+
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+def SBPort4 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
+def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
+def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
+
+// Integer division issued on port 0.
+def SBDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [SBPort4]>;
+
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
+
+defm : SBWriteResPair<WriteALU, SBPort015, 1>;
+defm : SBWriteResPair<WriteIMul, SBPort1, 3>;
+defm : SBWriteResPair<WriteShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteJump, SBPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SBPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
+defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
+defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
+defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
+defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
+defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
+
+// Vector integer operations.
+defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
+defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
+
+def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index c14407f9ac1b..9fbde88b7100 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,9 +7,94 @@
//
//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
-def IIC_DEFAULT : InstrItinClass;
def IIC_ALU_MEM : InstrItinClass;
def IIC_ALU_NONMEM : InstrItinClass;
def IIC_LEA : InstrItinClass;
@@ -470,12 +555,19 @@ def IIC_NOP : InstrItinClass;
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
// The GenericModel contains no instruciton itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0;
let LoadLatency = 4;
let HighLatency = 10;
+ let ILPWindow = 10;
}
include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 87102614cc8b..cce8f1b11436 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries<
// InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >,
//
// Default is 1 cycle, port0 or port1
- InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
@@ -525,6 +524,7 @@ def AtomModel : SchedMachineModel {
// OperandCycles may be used for expected latency.
let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let ILPWindow = 0; // Always try to hide expected latency.
let Itineraries = AtomItineraries;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 723e50cc1886..f934fdd85914 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "x86-selectiondag-info"
#include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
@@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
+ // ESI might be used as a base pointer, in that case we can't simply overwrite
+ // the register. Fall back to generic code.
+ const X86RegisterInfo *TRI =
+ static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
+ if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
+ TRI->getBaseRegister() == X86::ESI)
+ return SDValue();
+
MVT AVT;
if (Align & 1)
AVT = MVT::i8;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d1ed68028771..14619b63927b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -14,11 +14,13 @@
#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -35,8 +37,7 @@ using namespace llvm;
/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
-unsigned char X86Subtarget::
-ClassifyBlockAddressReference() const {
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
@@ -155,6 +156,12 @@ const char *X86Subtarget::getBZeroEntry() const {
return 0;
}
+bool X86Subtarget::hasSinCos() const {
+ return getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+ is64Bit();
+}
+
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
@@ -234,12 +241,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
ToggleFeature(X86::FeatureSlowBTMem);
}
- // If it's Nehalem, unaligned memory access is fast.
- // Include Westmere and Sandy Bridge as well.
- // FIXME: add later processors.
- if (IsIntel && ((Family == 6 && Model == 26) ||
- (Family == 6 && Model == 44) ||
- (Family == 6 && Model == 42))) {
+ // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+ // memory access is fast. We hard code model numbers here because they
+ // aren't strictly increasing for Intel chips it seems.
+ if (IsIntel &&
+ ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+ // Jasper Froest
+ (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
+ (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+ (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+ (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+ (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+ (Family == 6 && Model == 0x2A) || // SandyBridge
+ (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+ (Family == 6 && Model == 0x3A))) {// IvyBridge
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
@@ -267,6 +282,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasLZCNT = true;
ToggleFeature(X86::FeatureLZCNT);
}
+ if (IsIntel && ((ECX >> 8) & 0x1)) {
+ HasPRFCHW = true;
+ ToggleFeature(X86::FeaturePRFCHW);
+ }
if (IsAMD) {
if ((ECX >> 6) & 0x1) {
HasSSE4A = true;
@@ -294,6 +313,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasBMI = true;
ToggleFeature(X86::FeatureBMI);
}
+ if ((EBX >> 4) & 0x1) {
+ HasHLE = true;
+ ToggleFeature(X86::FeatureHLE);
+ }
if (IsIntel && ((EBX >> 5) & 0x1)) {
X86SSELevel = AVX2;
ToggleFeature(X86::FeatureAVX2);
@@ -306,48 +329,35 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasRTM = true;
ToggleFeature(X86::FeatureRTM);
}
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 18) & 0x1)) {
+ HasRDSEED = true;
+ ToggleFeature(X86::FeatureRDSEED);
+ }
}
}
}
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
- unsigned StackAlignOverride, bool is64Bit)
- : X86GenSubtargetInfo(TT, CPU, FS)
- , X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , X86SSELevel(NoMMXSSE)
- , X863DNowLevel(NoThreeDNow)
- , HasCMov(false)
- , HasX86_64(false)
- , HasPOPCNT(false)
- , HasSSE4A(false)
- , HasAES(false)
- , HasPCLMUL(false)
- , HasFMA(false)
- , HasFMA4(false)
- , HasXOP(false)
- , HasMOVBE(false)
- , HasRDRAND(false)
- , HasF16C(false)
- , HasFSGSBase(false)
- , HasLZCNT(false)
- , HasBMI(false)
- , HasBMI2(false)
- , HasRTM(false)
- , IsBTMemSlow(false)
- , IsUAMemFast(false)
- , HasVectorUAMem(false)
- , HasCmpxchg16b(false)
- , UseLeaForSP(false)
- , HasSlowDivide(false)
- , PostRAScheduler(false)
- , stackAlignment(4)
- // FIXME: this is a known good value for Yonah. How about others?
- , MaxInlineSizeThreshold(128)
- , TargetTriple(TT)
- , In64BitMode(is64Bit) {
- // Determine default and user specified characteristics
+void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (!FS.empty() || !CPU.empty()) {
if (CPUName.empty()) {
@@ -424,6 +434,57 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
stackAlignment = 16;
}
+void X86Subtarget::initializeEnvironment() {
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasHLE = false;
+ HasADX = false;
+ HasPRFCHW = false;
+ HasRDSEED = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ CallRegIndirect = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , X86ProcFamily(Others)
+ , PICStyle(PICStyles::None)
+ , TargetTriple(TT)
+ , StackAlignOverride(StackAlignOverride)
+ , In64BitMode(is64Bit) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
bool X86Subtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8bf4cc77f762..6fbdb1d5f00f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,8 +14,8 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
-#include "llvm/CallingConv.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -121,6 +121,18 @@ protected:
/// HasRTM - Processor has RTM instructions.
bool HasRTM;
+ /// HasHLE - Processor has HLE.
+ bool HasHLE;
+
+ /// HasADX - Processor has ADX instructions.
+ bool HasADX;
+
+ /// HasPRFCHW - Processor has PRFCHW instructions.
+ bool HasPRFCHW;
+
+ /// HasRDSEED - Processor has RDSEED instructions.
+ bool HasRDSEED;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -146,6 +158,14 @@ protected:
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
+ /// PadShortFunctions - True if the short functions should be padded to prevent
+ /// a stall when returning too early.
+ bool PadShortFunctions;
+
+ /// CallRegIndirect - True if the Calls with memory reference should be converted
+ /// to a register-based indirect call.
+ bool CallRegIndirect;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -161,11 +181,13 @@ protected:
InstrItineraryData InstrItins;
private:
+ /// StackAlignOverride - Override the stack alignment.
+ unsigned StackAlignOverride;
+
/// In64BitMode - True if compiling for 64-bit, false for 32-bit.
bool In64BitMode;
public:
-
/// This constructor initializes the data members to match that
/// of the specified triple.
///
@@ -190,7 +212,26 @@ public:
/// instruction.
void AutoDetectSubtargetFeatures();
- bool is64Bit() const { return In64BitMode; }
+ /// \brief Reset the features for the X86 target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
+ /// Is this x86_64? (disregarding specific ABI / programming model)
+ bool is64Bit() const {
+ return In64BitMode;
+ }
+
+ /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
+ bool isTarget64BitILP32() const {
+ return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32);
+ }
+
+ /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
+ bool isTarget64BitLP64() const {
+ return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+ }
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
@@ -205,6 +246,8 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
+ bool hasFp256() const { return hasAVX(); }
+ bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
@@ -223,12 +266,18 @@ public:
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasRTM() const { return HasRTM; }
+ bool hasHLE() const { return HasHLE; }
+ bool hasADX() const { return HasADX; }
+ bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
+ bool padShortFunctions() const { return PadShortFunctions; }
+ bool callRegIndirect() const { return CallRegIndirect; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
@@ -247,7 +296,7 @@ public:
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NativeClient;
+ return TargetTriple.getOS() == Triple::NaCl;
}
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
@@ -308,6 +357,10 @@ public:
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 158f9dc06693..8aa58a204260 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -13,13 +13,13 @@
#include "X86TargetMachine.h"
#include "X86.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
extern "C" void LLVMInitializeX86Target() {
@@ -46,10 +46,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
"e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
"n8:16:32-S128"),
InstrInfo(*this),
- TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ TSInfo(*this),
+ JITInfo(*this) {
}
void X86_64TargetMachine::anchor() { }
@@ -60,13 +59,16 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
- DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
- "n8:16:32:64-S128"),
+ // The x32 ABI dictates the ILP32 programming model for x64.
+ DL(getSubtargetImpl()->isTarget64BitILP32() ?
+ "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128" :
+ "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
InstrInfo(*this),
- TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo), VTTI(&TLInfo){
+ TSInfo(*this),
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
@@ -121,6 +123,19 @@ X86EarlyIfConv("x86-early-ifcvt",
cl::desc("Enable early if-conversion on X86"));
//===----------------------------------------------------------------------===//
+// X86 Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our X86 pass. This
+ // allows the X86 pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createX86TargetTransformInfoPass(this));
+}
+
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -140,6 +155,7 @@ public:
}
virtual bool addInstSelector();
+ virtual bool addILPOpts();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
virtual bool addPreEmitPass();
@@ -147,12 +163,7 @@ public:
} // namespace
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
- X86PassConfig *PC = new X86PassConfig(this, PM);
-
- if (X86EarlyIfConv && Subtarget.hasCMov())
- PC->enablePass(&EarlyIfConverterID);
-
- return PC;
+ return new X86PassConfig(this, PM);
}
bool X86PassConfig::addInstSelector() {
@@ -170,8 +181,15 @@ bool X86PassConfig::addInstSelector() {
return false;
}
+bool X86PassConfig::addILPOpts() {
+ if (X86EarlyIfConv && getX86Subtarget().hasCMov()) {
+ addPass(&EarlyIfConverterID);
+ return true;
+ }
+ return false;
+}
+
bool X86PassConfig::addPreRegAlloc() {
- addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
@@ -192,6 +210,12 @@ bool X86PassConfig::addPreEmitPass() {
ShouldPrint = true;
}
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().padShortFunctions()) {
+ addPass(createX86PadShortFunctions());
+ ShouldPrint = true;
+ }
+
return ShouldPrint;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 12311a1abfbd..174d3918318d 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -15,16 +15,15 @@
#define X86TARGETMACHINE_H
#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86ISelLowering.h"
#include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -65,6 +64,9 @@ public:
return &InstrItins;
}
+ /// \brief Register X86 analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
// Set up the pass pipeline.
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
@@ -78,11 +80,9 @@ class X86_32TargetMachine : public X86TargetMachine {
virtual void anchor();
const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
- X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86JITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- X86VectorTargetTransformInfo VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -101,12 +101,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
/// X86_64TargetMachine - X86 64-bit target machine.
@@ -115,11 +109,9 @@ class X86_64TargetMachine : public X86TargetMachine {
virtual void anchor();
const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
- X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86JITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -138,12 +130,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 92aee0dd3fcf..871dacd6a1c1 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,23 +8,19 @@
//===----------------------------------------------------------------------===//
#include "X86TargetObjectFile.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
+
using namespace llvm;
using namespace dwarf;
const MCExpr *X86_64MachoTargetObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
// On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
// is an indirect pc-relative reference.
@@ -37,7 +33,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
}
return TargetLoweringObjectFileMachO::
- getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
MCSymbol *X86_64MachoTargetObjectFile::
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 2d320c594cb9..9d26d389d4de 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -11,8 +11,8 @@
#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -21,9 +21,9 @@ namespace llvm {
class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
virtual const MCExpr *
- getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
// getCFIPersonalitySymbol - The symbol that gets passed to
// .cfi_personality.
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
new file mode 100644
index 000000000000..a98c6991192c
--- /dev/null
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -0,0 +1,495 @@
+//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// X86 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86tti"
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeX86TTIPass(PassRegistry &);
+}
+
+namespace {
+
+class X86TTI : public ImmutablePass, public TargetTransformInfo {
+ const X86TargetMachine *TM;
+ const X86Subtarget *ST;
+ const X86TargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ X86TTI(const X86TargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeX86TTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
+ "X86 Target Transform Info", true, true, false)
+char X86TTI::ID = 0;
+
+ImmutablePass *
+llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
+ return new X86TTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // TODO: Currently the __builtin_popcount() implementation using SSE3
+ // instructions is inefficient. Once the problem is fixed, we should
+ // call ST->hasSSE3() instead of ST->hasSSE4().
+ return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasSSE1())
+ return 0;
+
+ if (ST->is64Bit())
+ return 16;
+ return 8;
+}
+
+unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAVX()) return 256;
+ if (ST->hasSSE1()) return 128;
+ return 0;
+ }
+
+ if (ST->is64Bit())
+ return 64;
+ return 32;
+
+}
+
+unsigned X86TTI::getMaximumUnrollFactor() const {
+ if (ST->isAtom())
+ return 1;
+
+ // Sandybridge and Haswell have multiple execution ports and pipelined
+ // vector units.
+ if (ST->hasAVX())
+ return 4;
+
+ return 2;
+}
+
+unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const CostTblEntry<MVT> AVX2CostTable[] = {
+ // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+ // customize them to detect the cases where shift amount is a scalar one.
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 1 },
+ { ISD::SRA, MVT::v4i32, 1 },
+ { ISD::SHL, MVT::v8i32, 1 },
+ { ISD::SRL, MVT::v8i32, 1 },
+ { ISD::SRA, MVT::v8i32, 1 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 1 },
+ { ISD::SHL, MVT::v4i64, 1 },
+ { ISD::SRL, MVT::v4i64, 1 },
+
+ { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
+ { ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
+
+ { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
+
+ { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
+ { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2CostTable[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+ // We don't correctly identify costs of casts because they are marked as
+ // custom.
+ // Constant splats are cheaper for the following instructions.
+ { ISD::SHL, MVT::v16i8, 1 }, // psllw.
+ { ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v2i64, 1 }, // psllq.
+
+ { ISD::SRL, MVT::v16i8, 1 }, // psrlw.
+ { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+
+ { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
+ { ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2()) {
+ int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
+ array_lengthof(SSE2UniformConstCostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+ }
+
+
+ static const CostTblEntry<MVT> SSE2CostTable[] = {
+ // We don't correctly identify costs of casts because they are marked as
+ // custom.
+ // For some cases, where the shift amount is a scalar we would be able
+ // to generate better code. Unfortunately, when this is the case the value
+ // (the splat) will get hoisted out of the loop, thereby making it invisible
+ // to ISel. The cost model must return worst case assumptions because it is
+ // used for vectorization and we don't want to make vectorized code worse
+ // than scalar code.
+ { ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
+ { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
+
+ { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
+ { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
+
+ { ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
+ { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
+ };
+
+ if (ST->hasSSE2()) {
+ int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE2CostTable[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT> AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+ // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+ // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+ // Because we believe v4i64 to be a legal type, we must also include the
+ // split factor of two in the cost table. Therefore, the cost here is 18
+ // instead of 9.
+ { ISD::MUL, MVT::v4i64, 18 },
+ };
+
+ // Look for AVX1 lowering tricks.
+ if (ST->hasAVX() && !ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX1CostTable[Idx].Cost;
+ }
+
+ // Custom lowering of vectors.
+ static const CostTblEntry<MVT> CustomLowered[] = {
+ // A v2i64/v4i64 and multiply is custom lowered as a series of long
+ // multiplies(3), shifts(4) and adds(2).
+ { ISD::MUL, MVT::v2i64, 9 },
+ { ISD::MUL, MVT::v4i64, 9 },
+ };
+ int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * CustomLowered[Idx].Cost;
+
+ // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
+ // 2x pmuludq, 2x shuffle.
+ if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
+ !ST->hasSSE41())
+ return 6;
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only estimate the cost of reverse shuffles.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
+
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+}
+
+unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
+
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
+
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
+ };
+
+ if (ST->hasAVX()) {
+ int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
+ array_lengthof(AVXConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return AVXConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const CostTblEntry<MVT> SSE42CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+ { ISD::SETCC, MVT::v2i64, 1 },
+ { ISD::SETCC, MVT::v4i32, 1 },
+ { ISD::SETCC, MVT::v8i16, 1 },
+ { ISD::SETCC, MVT::v16i8, 1 },
+ };
+
+ static const CostTblEntry<MVT> AVX1CostTbl[] = {
+ { ISD::SETCC, MVT::v4f64, 1 },
+ { ISD::SETCC, MVT::v8f32, 1 },
+ // AVX1 does not support 8-wide integer compare.
+ { ISD::SETCC, MVT::v4i64, 4 },
+ { ISD::SETCC, MVT::v8i32, 4 },
+ { ISD::SETCC, MVT::v16i16, 4 },
+ { ISD::SETCC, MVT::v32i8, 4 },
+ };
+
+ static const CostTblEntry<MVT> AVX2CostTbl[] = {
+ { ISD::SETCC, MVT::v4i64, 1 },
+ { ISD::SETCC, MVT::v8i32, 1 },
+ { ISD::SETCC, MVT::v16i16, 1 },
+ { ISD::SETCC, MVT::v32i8, 1 },
+ };
+
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX2CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ if (Index != -1U) {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+ // This type is legalized to a scalar type.
+ if (!LT.second.isVector())
+ return 0;
+
+ // The type may be split. Normalize the index to the new type.
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+
+ // Floating point scalars are already located in index #0.
+ if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ return 0;
+ }
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // On Sandybridge 256bit load/stores are double pumped
+ // (but not on Haswell).
+ if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
+ Cost*=2;
+
+ return Cost;
+}
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index c4a58874a414..0f77948c0eff 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
return false;
}
+static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+ for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
+ if (!MO.clobbersPhysReg(reg))
+ return false;
+ }
+ return true;
+}
+
static bool hasYmmReg(MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+ return true;
if (!MO.isReg())
continue;
if (MO.isDebug())
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index ca94f03a6496..099ad390d2a7 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS XCore.td)
tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM XCoreGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv)
@@ -15,6 +16,7 @@ add_llvm_target(XCoreCodeGen
XCoreISelDAGToDAG.cpp
XCoreISelLowering.cpp
XCoreMachineFunctionInfo.cpp
+ XCoreMCInstLower.cpp
XCoreRegisterInfo.cpp
XCoreSubtarget.cpp
XCoreTargetMachine.cpp
@@ -24,5 +26,7 @@ add_llvm_target(XCoreCodeGen
add_dependencies(LLVMXCoreCodeGen intrinsics_gen)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/Disassembler/CMakeLists.txt b/lib/Target/XCore/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..cdc5d993b8bf
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMXCoreDisassembler
+ XCoreDisassembler.cpp
+ )
+
+add_dependencies(LLVMXCoreDisassembler XCoreCommonTableGen)
diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..028de2cb3433
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreDisassembler
+parent = XCore
+required_libraries = MC Support XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/Disassembler/Makefile b/lib/Target/XCore/Disassembler/Makefile
new file mode 100644
index 000000000000..4caffdd1da6a
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/Disassembler/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDisassembler
+
+# Hack: we need to include 'main' XCore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
new file mode 100644
index 000000000000..7b99967c4f32
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -0,0 +1,800 @@
+//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the XCore Disassembler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreRegisterInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// \brief A disassembler class for XCore.
+class XCoreDisassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
+ MCDisassembler(STI), RegInfo(Info) {}
+
+ /// \brief See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+}
+
+static bool readInstruction16(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint16_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 2 Bytes of data.
+ if (region.readBytes(address, 2, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 16-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+ return true;
+}
+
+static bool readInstruction32(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint32_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 4 Bytes of data.
+ if (region.readBytes(address, 4, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 32-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ return true;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
+ return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus Decode2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "XCoreGenDisassemblerTables.inc"
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder)
+{
+ if (RegNo > 11)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder)
+{
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val > 11)
+ return MCDisassembler::Fail;
+ static unsigned Values[] = {
+ 32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
+ };
+ Inst.addOperand(MCOperand::CreateImm(Values[Val]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined < 27)
+ return MCDisassembler::Fail;
+ if (fieldFromInstruction(Insn, 5, 1)) {
+ if (Combined == 31)
+ return MCDisassembler::Fail;
+ Combined += 5;
+ }
+ Combined -= 27;
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = Combined / 3;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2,
+ unsigned &Op3) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined >= 27)
+ return MCDisassembler::Fail;
+
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = (Combined / 3) % 3;
+ unsigned Op3High = Combined / 9;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a 3R instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 11, 5);
+ switch (Opcode) {
+ case 0x0:
+ Inst.setOpcode(XCore::STW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x1:
+ Inst.setOpcode(XCore::LDW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x2:
+ Inst.setOpcode(XCore::ADD_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3:
+ Inst.setOpcode(XCore::SUB_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4:
+ Inst.setOpcode(XCore::SHL_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5:
+ Inst.setOpcode(XCore::SHR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6:
+ Inst.setOpcode(XCore::EQ_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7:
+ Inst.setOpcode(XCore::AND_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8:
+ Inst.setOpcode(XCore::OR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9:
+ Inst.setOpcode(XCore::LDW_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10:
+ Inst.setOpcode(XCore::LD16S_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11:
+ Inst.setOpcode(XCore::LD8U_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12:
+ Inst.setOpcode(XCore::ADD_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x13:
+ Inst.setOpcode(XCore::SUB_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14:
+ Inst.setOpcode(XCore::SHL_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x15:
+ Inst.setOpcode(XCore::SHR_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x16:
+ Inst.setOpcode(XCore::EQ_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x17:
+ Inst.setOpcode(XCore::TSETR_3r);
+ return Decode3RImmInstruction(Inst, Insn, Address, Decoder);
+ case 0x18:
+ Inst.setOpcode(XCore::LSS_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19:
+ Inst.setOpcode(XCore::LSU_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op2));
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L3R / L2RUS instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 16, 4) |
+ fieldFromInstruction(Insn, 27, 5) << 4;
+ switch (Opcode) {
+ case 0x0c:
+ Inst.setOpcode(XCore::STW_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x1c:
+ Inst.setOpcode(XCore::XOR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x2c:
+ Inst.setOpcode(XCore::ASHR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3c:
+ Inst.setOpcode(XCore::LDAWF_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4c:
+ Inst.setOpcode(XCore::LDAWB_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5c:
+ Inst.setOpcode(XCore::LDA16F_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6c:
+ Inst.setOpcode(XCore::LDA16B_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7c:
+ Inst.setOpcode(XCore::MUL_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8c:
+ Inst.setOpcode(XCore::DIVS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9c:
+ Inst.setOpcode(XCore::DIVU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10c:
+ Inst.setOpcode(XCore::ST16_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11c:
+ Inst.setOpcode(XCore::ST8_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12c:
+ Inst.setOpcode(XCore::ASHR_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12d:
+ Inst.setOpcode(XCore::OUTPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12e:
+ Inst.setOpcode(XCore::INPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x13c:
+ Inst.setOpcode(XCore::LDAWF_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14c:
+ Inst.setOpcode(XCore::LDAWB_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x15c:
+ Inst.setOpcode(XCore::CRC_l3r);
+ return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder);
+ case 0x18c:
+ Inst.setOpcode(XCore::REMS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19c:
+ Inst.setOpcode(XCore::REMU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5, Op6;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return S;
+ S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6);
+ if (S != MCDisassembler::Success)
+ return S;
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L6R instruction.
+ Inst.clear();
+ unsigned Opcode = fieldFromInstruction(Insn, 27, 5);
+ switch (Opcode) {
+ case 0x00:
+ Inst.setOpcode(XCore::LMUL_l6r);
+ return DecodeL6RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+ S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+MCDisassembler::DecodeStatus
+XCoreDisassembler::getInstruction(MCInst &instr,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ uint16_t insn16;
+
+ if (!readInstruction16(Region, Address, Size, insn16)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus Result = decodeInstruction(DecoderTable16, instr, insn16,
+ Address, this, STI);
+ if (Result != Fail) {
+ Size = 2;
+ return Result;
+ }
+
+ uint32_t insn32;
+
+ if (!readInstruction32(Region, Address, Size, insn32)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTable32, instr, insn32, Address, this, STI);
+ if (Result != Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ return Fail;
+}
+
+namespace llvm {
+ extern Target TheXCoreTarget;
+}
+
+static MCDisassembler *createXCoreDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new XCoreDisassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeXCoreDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheXCoreTarget,
+ createXCoreDisassembler);
+}
diff --git a/lib/Target/XCore/InstPrinter/CMakeLists.txt b/lib/Target/XCore/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..930e733cd7f1
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreAsmPrinter
+ XCoreInstPrinter.cpp
+ )
+
+add_dependencies(LLVMXCoreAsmPrinter XCoreCommonTableGen)
diff --git a/lib/Target/XCore/InstPrinter/LLVMBuild.txt b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..8750bc7acedc
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreAsmPrinter
+parent = XCore
+required_libraries = MC Support
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/InstPrinter/Makefile b/lib/Target/XCore/InstPrinter/Makefile
new file mode 100644
index 000000000000..1c1c61299c39
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreAsmPrinter
+
+# Hack: we need to include 'main' xcore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
new file mode 100644
index 000000000000..1592351c3861
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an XCore MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCoreInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void XCoreInstPrinter::
+printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT");
+}
+
+void XCoreInstPrinter::
+printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT32");
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+ int Offset = 0;
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert(SRE && CE && "Binary expression must be sym+const.");
+ Offset = CE->getValue();
+ } else {
+ SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+ assert(SRE && "Unexpected MCExpr type.");
+ }
+ assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+ OS << SRE->getSymbol();
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+}
+
+void XCoreInstPrinter::
+printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ printExpr(Op.getExpr(), O);
+}
+
+void XCoreInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1, O);
+}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
new file mode 100644
index 000000000000..772c515b5c9e
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -0,0 +1,44 @@
+//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the XCoreInstPrinter class,
+/// which is used to print XCore MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTPRINTER_H
+#define XCOREINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class XCoreInstPrinter : public MCInstPrinter {
+public:
+ XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+private:
+ void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+ void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 53b4a9e3f5f7..59e64ad0855c 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -16,13 +16,14 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = MCTargetDesc TargetInfo
+subdirectories = Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = XCore
parent = Target
has_asmprinter = 1
+has_disassembler = 1
[component_1]
type = Library
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index a80c939b4372..8213f9e42883 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = XCoreDesc
parent = XCore
-required_libraries = MC XCoreInfo
+required_libraries = MC XCoreAsmPrinter XCoreInfo
add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index bbfdd4356f2a..b5b072dcbda6 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "XCoreMCTargetDesc.h"
+#include "InstPrinter/XCoreInstPrinter.h"
#include "XCoreMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -69,6 +70,15 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
+static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new XCoreInstPrinter(MAI, MII, MRI);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeXCoreTargetMC() {
// Register the MC asm info.
@@ -87,4 +97,8 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
createXCoreMCSubtargetInfo);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
+ createXCoreMCInstPrinter);
}
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index b823c4ed37e9..92ddc8860876 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -14,10 +14,10 @@ TARGET = XCore
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \
XCoreGenAsmWriter.inc \
- XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
- XCoreGenSubtargetInfo.inc
+ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+ XCoreGenDisassemblerTables.inc XCoreGenSubtargetInfo.inc
-DIRS = TargetInfo MCTargetDesc
+DIRS = Disassembler InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 9a0971d1e45f..00e34e04fbe5 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "XCore.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 04a1dd5e95be..e9a6d88fd68e 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -41,7 +41,13 @@ def : Proc<"xs1b-generic", []>;
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def XCoreAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
def XCore : Target {
// Pull in Instruction Info:
let InstructionSet = XCoreInstrInfo;
+ let AssemblyWriters = [XCoreAsmWriter];
}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index caae56227214..0d146ba4d98d 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -14,31 +14,34 @@
#define DEBUG_TYPE "asm-printer"
#include "XCore.h"
+#include "InstPrinter/XCoreInstPrinter.h"
#include "XCoreInstrInfo.h"
+#include "XCoreMCInstLower.h"
#include "XCoreSubtarget.h"
#include "XCoreTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
#include <cctype>
using namespace llvm;
@@ -52,16 +55,17 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
+ XCoreMCInstLower MCInstLowering;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
public:
explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+ : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
+ MCInstLowering(*this) {}
virtual const char *getPassName() const {
return "XCore Assembly Printer";
}
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
const std::string &directive = ".jmptable");
void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
@@ -75,18 +79,14 @@ namespace {
void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
virtual void EmitGlobalVariable(const GlobalVariable *GV);
- void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
- static const char *getRegisterName(unsigned RegNo);
-
void EmitFunctionEntryLabel();
void EmitInstruction(const MachineInstr *MI);
+ void EmitFunctionBodyStart();
void EmitFunctionBodyEnd();
virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
};
} // end of anonymous namespace
-#include "XCoreGenAsmWriter.inc"
-
void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
assert(((GV->hasExternalLinkage() ||
GV->hasWeakLinkage()) ||
@@ -171,12 +171,16 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
- OutStreamer.EmitZeros(4 - Size, 0);
+ OutStreamer.EmitZeros(4 - Size);
// Mark the end of the global
OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
}
+void XCoreAsmPrinter::EmitFunctionBodyStart() {
+ MCInstLowering.Initialize(Mang, &MF->getContext());
+}
+
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
/// the last basic block in the function.
void XCoreAsmPrinter::EmitFunctionBodyEnd() {
@@ -192,17 +196,6 @@ void XCoreAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitLabel(CurrentFnSym);
}
-void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- printOperand(MI, opNum, O);
-
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return;
-
- O << "+";
- printOperand(MI, opNum+1, O);
-}
-
void XCoreAsmPrinter::
printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
const std::string &directive) {
@@ -225,7 +218,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
- O << getRegisterName(MO.getReg());
+ O << XCoreInstPrinter::getRegisterName(MO.getReg());
break;
case MachineOperand::MO_Immediate:
O << MO.getImm();
@@ -270,7 +263,7 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
}
-printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, O);
return false;
}
@@ -317,15 +310,30 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case XCore::ADD_2rus:
if (MI->getOperand(2).getImm() == 0) {
- O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
- << getRegisterName(MI->getOperand(1).getReg());
+ O << "\tmov "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
OutStreamer.EmitRawText(O.str());
return;
}
break;
+ case XCore::BR_JT:
+ case XCore::BR_JT32:
+ O << "\tbru "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()) << '\n';
+ if (MI->getOpcode() == XCore::BR_JT)
+ printInlineJT(MI, 0, O);
+ else
+ printInlineJT32(MI, 0, O);
+ O << '\n';
+ OutStreamer.EmitRawText(O.str());
+ return;
}
- printInstruction(MI, O);
- OutStreamer.EmitRawText(O.str());
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+
+ OutStreamer.EmitInstruction(TmpInst);
}
// Force static initialization.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e18d97384d3d..beeb07f831c6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -16,16 +16,16 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -98,13 +98,10 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool FP = hasFP(MF);
- const AttrListPtr &PAL = MF.getFunction()->getAttributes();
+ const AttributeSet &PAL = MF.getFunction()->getAttributes();
- for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I)
- if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) {
- loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
- break;
- }
+ if (PAL.hasAttrSomewhere(Attribute::Nest))
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
// Work out frame sizes.
int FrameSize = MFI->getStackSize();
@@ -264,7 +261,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
MBB.erase(MBBI);
} else {
- int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
}
}
@@ -335,6 +332,58 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+ errs() << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
void
XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -360,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (RegInfo->requiresRegisterScavenging(MF)) {
// Reserve a slot close to SP or frame pointer.
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index db1bbb60d968..ebad62f2fa53 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -39,6 +39,10 @@ namespace llvm {
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 7564fbad7d45..fbf86c523054 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -13,23 +13,23 @@
#include "XCore.h"
#include "XCoreTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
@@ -211,15 +211,10 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
Ops, 4);
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- case Intrinsic::xcore_crc8:
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
- return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
- Ops, 3);
- }
- break;
+ case XCoreISD::CRC8: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
}
case ISD::BRIND:
if (SDNode *ResNode = SelectBRIND(N))
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9e7816e21f80..a5d2be88db7d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -14,17 +14,11 @@
#define DEBUG_TYPE "xcore-lower"
#include "XCoreISelLowering.h"
-#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
-#include "XCoreTargetObjectFile.h"
-#include "XCoreTargetMachine.h"
+#include "XCoreMachineFunctionInfo.h"
#include "XCoreSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +27,12 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -54,6 +54,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::LMUL : return "XCoreISD::LMUL";
case XCoreISD::MACCU : return "XCoreISD::MACCU";
case XCoreISD::MACCS : return "XCoreISD::MACCS";
+ case XCoreISD::CRC8 : return "XCoreISD::CRC8";
case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
default : return NULL;
@@ -83,7 +84,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
// XCore does not have the NodeTypes below.
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i32, Expand);
setOperationAction(ISD::ADDE, MVT::i32, Expand);
@@ -152,9 +153,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
- maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
- maxStoresPerMemmove = maxStoresPerMemmoveOptSize
- = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize
+ = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2;
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::STORE);
@@ -167,24 +171,25 @@ SDValue XCoreTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
{
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::VAARG: return LowerVAARG(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
- case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
// FIXME: Remove these when LegalizeDAGTypes lands.
case ISD::ADD:
- case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
- case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -225,20 +230,16 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
{
// FIXME there is no actual debug info here
DebugLoc dl = GA.getDebugLoc();
- if (isa<Function>(GV)) {
- return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ const GlobalValue *UnderlyingGV = GV;
+ // If GV is an alias then use the aliasee to determine the wrapper type
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ UnderlyingGV = GA->resolveAliasedGlobal();
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
+ if (GVar->isConstant())
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
}
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias then use the aliasee to determine constness
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
- }
- bool isConst = GVar && GVar->isConstant();
- if (isConst) {
- return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
- }
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
}
SDValue XCoreTargetLowering::
@@ -740,13 +741,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
XCoreISD::LSUB;
SDValue Zero = DAG.getConstant(0, MVT::i32);
- SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
- LHSL, RHSL, Zero);
- SDValue Lo(Carry.getNode(), 1);
+ SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Carry(Lo.getNode(), 1);
- SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
- LHSH, RHSH, Carry);
- SDValue Hi(Ignored.getNode(), 1);
+ SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Ignored(Hi.getNode(), 1);
// Merge the pieces
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
}
@@ -862,6 +863,23 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
}
+SDValue XCoreTargetLowering::
+LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::xcore_crc8:
+ EVT VT = Op.getValueType();
+ SDValue Data =
+ DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT),
+ Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3));
+ SDValue Crc(Data.getNode(), 1);
+ SDValue Results[] = { Crc, Data };
+ return DAG.getMergeValues(Results, 2, DL);
+ }
+ return SDValue();
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1231,15 +1249,11 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Return on XCore is always a "retsp 0"
+ RetOps.push_back(DAG.getConstant(0, MVT::i32));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1252,15 +1266,17 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// guarantee that all emitted copies are
// stuck together, avoiding something bad
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // Return on XCore is always a "retsp 0"
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
- Chain, DAG.getConstant(0, MVT::i32), Flag);
- else // Return Void
- return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
- Chain, DAG.getConstant(0, MVT::i32));
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
@@ -1357,13 +1373,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
DAG.getConstant(1, VT));
- SDValue Ops [] = { Carry, Result };
+ SDValue Ops[] = { Result, Carry };
return DAG.getMergeValues(Ops, 2, dl);
}
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1371,7 +1387,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if ((KnownZero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
- SDValue Ops [] = { Carry, Result };
+ SDValue Ops[] = { Result, Carry };
return DAG.getMergeValues(Ops, 2, dl);
}
}
@@ -1395,14 +1411,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, VT), N2);
- SDValue Ops [] = { Borrow, Result };
+ SDValue Ops[] = { Result, Borrow };
return DAG.getMergeValues(Ops, 2, dl);
}
}
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1410,7 +1426,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if ((KnownZero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
- SDValue Ops [] = { Borrow, Result };
+ SDValue Ops[] = { Result, Borrow };
return DAG.getMergeValues(Ops, 2, dl);
}
}
@@ -1436,11 +1452,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// If the high result is unused fold to add(a, b)
if (N->hasNUsesOfValue(0, 0)) {
SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
- SDValue Ops [] = { Lo, Lo };
+ SDValue Ops[] = { Lo, Lo };
return DAG.getMergeValues(Ops, 2, dl);
}
// Otherwise fold to ladd(a, b, 0)
- return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ SDValue Result =
+ DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ SDValue Carry(Result.getNode(), 1);
+ SDValue Ops[] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
}
}
break;
@@ -1534,7 +1554,7 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
default: break;
case XCoreISD::LADD:
case XCoreISD::LSUB:
- if (Op.getResNo() == 0) {
+ if (Op.getResNo() == 1) {
// Top bits of carry / borrow are clear.
KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
KnownZero.getBitWidth() - 1);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 2874f00e4763..8d258f5054c1 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -63,6 +63,9 @@ namespace llvm {
// Corresponds to MACCS instruction
MACCS,
+ // Corresponds to CRC8 instruction
+ CRC8,
+
// Jumptable branch.
BR_JT,
@@ -81,7 +84,7 @@ namespace llvm {
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
virtual unsigned getJumpTableEncoding() const;
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -147,6 +150,7 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 1963a70fb30d..379cc39aa617 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -10,7 +10,7 @@
//===----------------------------------------------------------------------===//
// Instruction format superclass
//===----------------------------------------------------------------------===//
-class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstXCore<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
field bits<32> Inst;
@@ -19,102 +19,259 @@ class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
dag InOperandList = ins;
let AsmString = asmstr;
let Pattern = pattern;
+ let Size = sz;
+ field bits<32> SoftFail = 0;
}
// XCore pseudo instructions format
class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern>;
+ : InstXCore<0, outs, ins, asmstr, pattern> {
+ let isPseudo = 1;
+}
//===----------------------------------------------------------------------===//
// Instruction formats
//===----------------------------------------------------------------------===//
-class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _F3R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode3RInstruction";
+}
+
+// 3R with first operand as an immediate. Used for TSETR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F3RImm<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode3RImmInstruction";
+}
+
+class _FL3R<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL3RInstruction";
+}
+
+// L3R with first operand as both a source and a destination.
+class _FL3RSrcDst<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _FL3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL3RSrcDstInstruction";
+}
+
+class _F2RUS<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode2RUSInstruction";
+}
+
+// 2RUS with bitp operand
+class _F2RUSBitp<bits<5> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _F2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RUSBitpInstruction";
+}
+
+class _FL2RUS<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL2RUSInstruction";
+}
+
+// L2RUS with bitp operand
+class _FL2RUSBitp<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL2RUSBitpInstruction";
+}
+
+class _FRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<6> b;
+
+ let Inst{15-10} = opc;
+ let Inst{9-6} = a;
+ let Inst{5-0} = b;
}
-class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FLRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<16> b;
+
+ let Inst{31-26} = opc;
+ let Inst{25-22} = a;
+ let Inst{21-16} = b{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = b{15-6};
}
-class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<6> a;
+
+ let Inst{15-6} = opc;
+ let Inst{5-0} = a;
}
-class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FLU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<16> a;
+
+ let Inst{31-22} = opc;
+ let Inst{21-16} = a{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{15-6};
}
-class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<10> a;
+
+ let Inst{15-10} = opc;
+ let Inst{9-0} = a;
}
-class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FLU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<20> a;
+
+ let Inst{31-26} = opc;
+ let Inst{25-16} = a{9-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{19-10};
}
-class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "Decode2RInstruction";
}
-class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// 2R with first operand as an immediate. Used for TSETMR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F2RImm<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RImmInstruction";
}
-class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// 2R with first operand as both a source and a destination.
+class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RSrcDstInstruction";
}
-class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// Same as 2R with last two operands swapped
+class _FR2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeR2RInstruction";
}
-class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FRUS<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeRUSInstruction";
}
-class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// RUS with bitp operand
+class _FRUSBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSBitpInstruction";
}
-class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// RUS with first operand as both a source and a destination and a bitp second
+// operand
+class _FRUSSrcDstBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSSrcDstBitpInstruction";
}
-class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FL2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{9-5};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{4-1};
+
+ let Inst{15-11} = 0b11111;
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeL2RInstruction";
}
-class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// Same as L2R with last two operands swapped
+class _FLR2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _FL2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeLR2RInstruction";
}
-class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _F1R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+
+ let Inst{15-11} = opc{5-1};
+ let Inst{10-5} = 0b111111;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = a;
}
-class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{9-5};
+ let Inst{10-5} = 0b111111;
+ let Inst{4-0} = opc{4-0};
}
-class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FL4R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> d;
+
+ let Inst{31-27} = opc{5-1};
+ let Inst{26-21} = 0b111111;
+ let Inst{20} = opc{0};
+ let Inst{19-16} = d;
+ let Inst{15-11} = 0b11111;
+}
+
+// L4R with 4th operand as both a source and a destination.
+class _FL4RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstInstruction";
+}
+
+// L4R with 1st and 4th operand as both a source and a destination.
+class _FL4RSrcDstSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction";
+}
+
+class _FL5R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{5-1};
+ let Inst{20} = opc{0};
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL5RInstruction";
+}
+
+class _FL6R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc;
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL6RInstruction";
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 0a3008d7ab33..e457e0dbf027 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "XCoreInstrInfo.h"
-#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "XCoreMachineFunctionInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 3e7666bdb936..03653cb2b3de 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -32,8 +32,8 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
- [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>;
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -182,6 +182,7 @@ def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
// Address operands
def MEMii : Operand<i32> {
let PrintMethod = "printMemOperand";
+ let DecoderMethod = "DecodeMEMiiOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
@@ -200,154 +201,117 @@ def InlineJT32 : Operand<i32> {
// Three operand short
-multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass F3R_2RUS<bits<5> opc1, bits<5> opc2, string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
}
-multiclass F3R_2RUS_np<string OpcStr> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
+multiclass F3R_2RUS_np<bits<5> opc1, bits<5> opc2, string OpcStr> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
}
-multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass F3R_2RBITP<bits<5> opc1, bits<5> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
}
-class F3R<string OpcStr, SDNode OpNode> : _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class F3R<bits<5> opc, string OpcStr, SDNode OpNode> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-class F3R_np<string OpcStr> : _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
+class F3R_np<bits<5> opc, string OpcStr> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
// Three operand long
/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
- def _l3r: _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _l2rus : _FL2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass FL3R_L2RUS<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
}
/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
- def _l3r: _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _l2rus : _FL2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass FL3R_L2RBITP<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
}
-class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class FL3R<bits<9> opc, string OpcStr, SDNode OpNode> :
+ _FL3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
// Register - U6
// Operand register - U6
-multiclass FRU6_LRU6_branch<string OpcStr> {
- def _ru6: _FRU6<
- (outs), (ins GRRegs:$cond, brtarget:$dest),
- !strconcat(OpcStr, " $cond, $dest"),
- []>;
- def _lru6: _FLRU6<
- (outs), (ins GRRegs:$cond, brtarget:$dest),
- !strconcat(OpcStr, " $cond, $dest"),
- []>;
+multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
}
-multiclass FRU6_LRU6_cp<string OpcStr> {
- def _ru6: _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$a),
- !strconcat(OpcStr, " $dst, cp[$a]"),
- []>;
- def _lru6: _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$a),
- !strconcat(OpcStr, " $dst, cp[$a]"),
- []>;
+multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
}
-// U6
-multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(OpNode immU6:$b)]>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(OpNode immU16:$b)]>;
+multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
+ def _lru6: _FLRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
}
-multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(Int immU6:$b)]>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(Int immU16:$b)]>;
+
+// U6
+multiclass FU6_LU6<bits<10> opc, string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU16:$a)]>;
}
-multiclass FU6_LU6_np<string OpcStr> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
+multiclass FU6_LU6_int<bits<10> opc, string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU16:$a)]>;
}
-// U10
-multiclass FU10_LU10_np<string OpcStr> {
- def _u10: _FU10<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
- def _lu10: _FLU10<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
+multiclass FU6_LU6_np<bits<10> opc, string OpcStr> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
}
// Two operand short
-class F2R_np<string OpcStr> : _F2R<
- (outs GRRegs:$dst), (ins GRRegs:$b),
- !strconcat(OpcStr, " $dst, $b"),
- []>;
+class F2R_np<bits<6> opc, string OpcStr> :
+ _F2R<opc, (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"), []>;
// Two operand long
@@ -357,23 +321,23 @@ class F2R_np<string OpcStr> : _F2R<
let Defs = [SP], Uses = [SP] in {
def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
- "${:comment} ADJCALLSTACKDOWN $amt",
+ "# ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "${:comment} ADJCALLSTACKUP $amt1",
+ "# ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
- "${:comment} LDWFI $dst, $addr",
+ "# LDWFI $dst, $addr",
[(set GRRegs:$dst, (load ADDRspii:$addr))]>;
def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
- "${:comment} LDAWFI $dst, $addr",
+ "# LDAWFI $dst, $addr",
[(set GRRegs:$dst, ADDRspii:$addr)]>;
def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
- "${:comment} STWFI $src, $addr",
+ "# STWFI $src, $addr",
[(store GRRegs:$src, ADDRspii:$addr)]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
@@ -381,7 +345,7 @@ def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
let usesCustomInserter = 1 in {
def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
(ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
- "${:comment} SELECT_CC PSEUDO!",
+ "# SELECT_CC PSEUDO!",
[(set GRRegs:$dst,
(select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
}
@@ -391,572 +355,564 @@ let usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
// Three operand short
-defm ADD : F3R_2RUS<"add", add>;
-defm SUB : F3R_2RUS<"sub", sub>;
+defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>;
+defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>;
let neverHasSideEffects = 1 in {
-defm EQ : F3R_2RUS_np<"eq">;
-def LSS_3r : F3R_np<"lss">;
-def LSU_3r : F3R_np<"lsu">;
+defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">;
+def LSS_3r : F3R_np<0b11000, "lss">;
+def LSU_3r : F3R_np<0b11001, "lsu">;
}
-def AND_3r : F3R<"and", and>;
-def OR_3r : F3R<"or", or>;
+def AND_3r : F3R<0b00111, "and", and>;
+def OR_3r : F3R<0b01000, "or", or>;
let mayLoad=1 in {
-def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldw $dst, $addr[$offset]",
- []>;
+def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]", []>;
-def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
- "ldw $dst, $addr[$offset]",
- []>;
+def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]", []>;
-def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ld16s $dst, $addr[$offset]",
- []>;
+def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]", []>;
-def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ld8u $dst, $addr[$offset]",
- []>;
+def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]", []>;
}
let mayStore=1 in {
-def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "stw $val, $addr[$offset]",
- []>;
+def STW_l3r : _FL3R<0b000001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]", []>;
-def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
- "stw $val, $addr[$offset]",
- []>;
+def STW_2rus : _F2RUS<0b0000, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]", []>;
}
-defm SHL : F3R_2RBITP<"shl", shl>;
-defm SHR : F3R_2RBITP<"shr", srl>;
-// TODO tsetr
+defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>;
+defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>;
+
+// The first operand is treated as an immediate since it refers to a register
+// number in another thread.
+def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c),
+ "set t[$c]:r$a, $b", []>;
// Three operand long
-def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldaw $dst, $addr[$offset]",
- [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (ldawf GRRegs:$addr, GRRegs:$offset))]>;
let neverHasSideEffects = 1 in
-def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
- (ins GRRegs:$addr, i32imm:$offset),
- "ldaw $dst, $addr[$offset]",
- []>;
+def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]", []>;
-def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldaw $dst, $addr[-$offset]",
- [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (ldawb GRRegs:$addr, GRRegs:$offset))]>;
let neverHasSideEffects = 1 in
-def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
- (ins GRRegs:$addr, i32imm:$offset),
- "ldaw $dst, $addr[-$offset]",
- []>;
-
-def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "lda16 $dst, $addr[$offset]",
- [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
-
-def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "lda16 $dst, $addr[-$offset]",
- [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
-
-def MUL_l3r : FL3R<"mul", mul>;
+def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]", []>;
+
+def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<0b001111100, "mul", mul>;
// Instructions which may trap are marked as side effecting.
let hasSideEffects = 1 in {
-def DIVS_l3r : FL3R<"divs", sdiv>;
-def DIVU_l3r : FL3R<"divu", udiv>;
-def REMS_l3r : FL3R<"rems", srem>;
-def REMU_l3r : FL3R<"remu", urem>;
+def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>;
+def DIVU_l3r : FL3R<0b010011100, "divu", udiv>;
+def REMS_l3r : FL3R<0b110001100, "rems", srem>;
+def REMU_l3r : FL3R<0b110011100, "remu", urem>;
}
-def XOR_l3r : FL3R<"xor", xor>;
-defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+def XOR_l3r : FL3R<0b000011100, "xor", xor>;
+defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>;
let Constraints = "$src1 = $dst" in
-def CRC_l3r : _FL3R<(outs GRRegs:$dst),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "crc32 $dst, $src2, $src3",
- [(set GRRegs:$dst,
- (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
- GRRegs:$src3))]>;
+def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc32 $dst, $src2, $src3",
+ [(set GRRegs:$dst,
+ (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+ GRRegs:$src3))]>;
-// TODO inpw, outpw
let mayStore=1 in {
-def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "st16 $val, $addr[$offset]",
- []>;
+def ST16_l3r : _FL3R<0b100001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]", []>;
-def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "st8 $val, $addr[$offset]",
- []>;
+def ST8_l3r : _FL3R<0b100011100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]", []>;
}
-// Four operand long
-let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
-def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "maccu $dst1, $dst2, $src3, $src4",
- []>;
+def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a),
+ (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c",
+ []>;
-def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "maccs $dst1, $dst2, $src3, $src4",
- []>;
+def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs),
+ (ins GRRegs:$a, GRRegs:$b, i32imm:$c),
+ "outpw res[$b], $a, $c", []>;
+
+// Four operand long
+let Constraints = "$e = $a,$f = $b" in {
+def MACCU_l4r : _FL4RSrcDstSrcDst<
+ 0b000001, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>;
+
+def MACCS_l4r : _FL4RSrcDstSrcDst<
+ 0b000010, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>;
}
-let Constraints = "$src1 = $dst1" in
-def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "crc8 $dst1, $dst2, $src2, $src3",
- []>;
+let Constraints = "$e = $b" in
+def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$c, GRRegs:$d),
+ "crc8 $b, $a, $c, $d", []>;
// Five operand long
-def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "ladd $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst2, $dst1, $src1, $src2, $src3",
+ []>;
-def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "lsub $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst2, $dst1, $src1, $src2, $src3", []>;
-def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "ldiv $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldivu $dst1, $dst2, $src3, $src1, $src2", []>;
// Six operand long
-def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
- []>;
+def LMUL_l6r : _FL6R<
+ 0b00000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>;
// Register - U6
//let Uses = [DP] in ...
let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
- "ldaw $dst, dp[$a]",
- []>;
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]", []>;
let isReMaterializable = 1 in
-def LDAWDP_lru6: _FLRU6<
- (outs GRRegs:$dst), (ins MEMii:$a),
- "ldaw $dst, dp[$a]",
- [(set GRRegs:$dst, ADDRdpii:$a)]>;
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]",
+ [(set RRegs:$a, ADDRdpii:$b)]>;
let mayLoad=1 in
-def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
- "ldw $dst, dp[$a]",
- []>;
-
-def LDWDP_lru6: _FLRU6<
- (outs GRRegs:$dst), (ins MEMii:$a),
- "ldw $dst, dp[$a]",
- [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]", []>;
+
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]",
+ [(set RRegs:$a, (load ADDRdpii:$b))]>;
let mayStore=1 in
-def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
- "stw $val, dp[$addr]",
- []>;
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]", []>;
-def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
- "stw $val, dp[$addr]",
- [(store GRRegs:$val, ADDRdpii:$addr)]>;
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]",
+ [(store RRegs:$a, ADDRdpii:$b)]>;
//let Uses = [CP] in ..
let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
-defm LDWCP : FRU6_LRU6_cp<"ldw">;
+defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">;
let Uses = [SP] in {
let mayStore=1 in {
-def STWSP_ru6 : _FRU6<
- (outs), (ins GRRegs:$val, i32imm:$index),
- "stw $val, sp[$index]",
- [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
-
-def STWSP_lru6 : _FLRU6<
- (outs), (ins GRRegs:$val, i32imm:$index),
- "stw $val, sp[$index]",
- [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+def STWSP_ru6 : _FRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp RRegs:$a, immU6:$b)]>;
+
+def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp RRegs:$a, immU16:$b)]>;
}
let mayLoad=1 in {
-def LDWSP_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldw $dst, sp[$b]",
- []>;
+def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
-def LDWSP_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldw $dst, sp[$b]",
- []>;
+def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
}
let neverHasSideEffects = 1 in {
-def LDAWSP_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+def LDAWSP_ru6 : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
-def LDAWSP_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
-
-def LDAWSP_ru6_RRegs : _FRU6<
- (outs RRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
-
-def LDAWSP_lru6_RRegs : _FLRU6<
- (outs RRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+def LDAWSP_lru6 : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
}
}
let isReMaterializable = 1 in {
-def LDC_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldc $dst, $b",
- [(set GRRegs:$dst, immU6:$b)]>;
-
-def LDC_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldc $dst, $b",
- [(set GRRegs:$dst, immU16:$b)]>;
+def LDC_ru6 : _FRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set RRegs:$a, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set RRegs:$a, immU16:$b)]>;
}
-def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU6:$b)]>;
-def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU16:$b)]>;
// Operand register - U6
let isBranch = 1, isTerminator = 1 in {
-defm BRFT: FRU6_LRU6_branch<"bt">;
-defm BRBT: FRU6_LRU6_branch<"bt">;
-defm BRFF: FRU6_LRU6_branch<"bf">;
-defm BRBF: FRU6_LRU6_branch<"bf">;
+defm BRFT: FRU6_LRU6_branch<0b011100, "bt">;
+defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">;
+defm BRFF: FRU6_LRU6_branch<0b011110, "bf">;
+defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">;
}
// U6
let Defs = [SP], Uses = [SP] in {
let neverHasSideEffects = 1 in
-defm EXTSP : FU6_LU6_np<"extsp">;
+defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">;
+
let mayStore = 1 in
-defm ENTSP : FU6_LU6_np<"entsp">;
+defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">;
let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
-defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>;
}
}
-// TODO extdp, kentsp, krestsp, blat
-// getsr, kalli
+let neverHasSideEffects = 1 in
+defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">;
+
+let Uses = [R11], isCall=1 in
+defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
-def BRBU_lu6 : _FLU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
-def BRFU_u6 : _FU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
-def BRFU_lu6 : _FLU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
}
//let Uses = [CP] in ...
let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
- "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
[]>;
let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLRU6<
- (outs), (ins MEMii:$a),
- "ldaw r11, cp[$a]",
- [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+let Defs = [R11] in
+defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
-defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>;
-defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>;
// setsr may cause a branch if it is used to enable events. clrsr may
// branch if it is executed while events are enabled.
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
-defm SETSR_branch : FU6_LU6_np<"setsr">;
-defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ isCodeGenOnly = 1 in {
+defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">;
+defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">;
}
+defm KCALL : FU6_LU6_np<0b0111001111, "kcall">;
+
+let Uses = [SP], Defs = [SP], mayStore = 1 in
+defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">;
+
+let Uses = [SP], Defs = [SP], mayLoad = 1 in
+defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
+
// U10
-// TODO ldwcpl, blacp
let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAP_u10 : _FU10<
- (outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- []>;
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10 : _FLU10<
- (outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
-let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10_ba : _FLU10<(outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tblockaddress:$a))]>;
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BL_u10 : _FU10<
- (outs), (ins calltarget:$target),
- "bl $target",
- [(XCoreBranchLink immU10:$target)]>;
-
-def BL_lu10 : _FLU10<
- (outs), (ins calltarget:$target),
- "bl $target",
- [(XCoreBranchLink immU20:$target)]>;
+def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU10:$a)]>;
+
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU20:$a)]>;
+}
+
+let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1 in {
+def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>;
+
+def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]",
+ []>;
}
// Two operand short
-// TODO eet, eef, tsetmr
-def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
- "not $dst, $b",
- [(set GRRegs:$dst, (not GRRegs:$b))]>;
+def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>;
-def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
- "neg $dst, $b",
- [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+def NEG : _F2R<0b100100, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
let Constraints = "$src1 = $dst" in {
-def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "sext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
- immBitp:$src2))]>;
-
-def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "sext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
- GRRegs:$src2))]>;
-
-def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "zext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
- immBitp:$src2))]>;
-
-def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "zext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
- GRRegs:$src2))]>;
-
-def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "andnot $dst, $src2",
- [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+def SEXT_rus :
+ _FRUSSrcDstBitp<0b001101, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r :
+ _F2RSrcDst<0b001100, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ZEXT_rus :
+ _FRUSSrcDstBitp<0b010001, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r :
+ _F2RSrcDst<0b010000, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ANDNOT_2r :
+ _F2RSrcDst<0b001010, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
}
let isReMaterializable = 1, neverHasSideEffects = 1 in
-def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
- "mkmsk $dst, $size",
- []>;
+def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size", []>;
-def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
- "mkmsk $dst, $size",
- [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
+def MKMSK_2r : _F2R<0b101000, (outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
-def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
- "getr $dst, $type",
- [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+def GETR_rus : _FRUS<0b100000, (outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
-def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "getts $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+def GETTS_2r : _F2R<0b001110, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
-def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setpt res[$r], $val",
- [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+def SETPT_2r : _FR2R<0b001111, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
-def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "outct res[$r], $val",
- [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+def OUTCT_2r : _F2R<0b010010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
-def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
- "outct res[$r], $val",
- [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+def OUTCT_rus : _FRUS<0b010011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
-def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "outt res[$r], $val",
- [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+def OUTT_2r : _FR2R<0b000011, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
-def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "out res[$r], $val",
- [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+def OUT_2r : _FR2R<0b101010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
let Constraints = "$src = $dst" in
-def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
- "outshr res[$r], $src",
- [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
- GRRegs:$src))]>;
+def OUTSHR_2r :
+ _F2RSrcDst<0b101011, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
-def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "inct $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+def INCT_2r : _F2R<0b100001, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
-def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "int $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+def INT_2r : _F2R<0b100011, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
-def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def IN_2r : _F2R<0b101100, (outs GRRegs:$dst), (ins GRRegs:$r),
"in $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
let Constraints = "$src = $dst" in
-def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
- "inshr $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
- GRRegs:$src))]>;
+def INSHR_2r :
+ _F2RSrcDst<0b101101, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
-def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "chkct res[$r], $val",
- [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+def CHKCT_2r : _F2R<0b110010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
-def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
- "chkct res[$r], $val",
- [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+def CHKCT_rus : _FRUSBitp<0b110011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
-def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTCT_2r : _F2R<0b101111, (outs GRRegs:$dst), (ins GRRegs:$src),
"testct $dst, res[$src]",
[(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
-def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTWCT_2r : _F2R<0b110001, (outs GRRegs:$dst), (ins GRRegs:$src),
"testwct $dst, res[$src]",
[(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
-def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setd res[$r], $val",
- [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
-def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r),
"getst $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
-def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITSP_2r : _F2R<0b000100, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:sp, $src",
[(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
-def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITPC_2r : _F2R<0b000000, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:pc, $src",
[(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
-def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITCP_2r : _F2R<0b000110, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:cp, $src",
[(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
-def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITDP_2r : _F2R<0b000010, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:dp, $src",
[(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+def PEEK_2r : _F2R<0b101110, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
+def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eef $a, res[$b]", []>;
+
+def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eet $a, res[$b]", []>;
+
+def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b),
+ "tsetmr r$a, $b", []>;
+
// Two operand long
-// getd, testlcl
-def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "bitrev $dst, $src",
- [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
-def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "byterev $dst, $src",
- [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
-def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "clz $dst, $src",
- [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getd $dst, res[$src]", []>;
-def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getn $dst, res[$src]", []>;
-def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "settw res[$r], $val",
- [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
-def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "get $dst, ps[$src]",
- [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+def SETTW_l2r : _FLR2R<0b0010011001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
-def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "set ps[$src1], $src2",
- [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+def GETPS_l2r : _FL2R<0b0001011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
-def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def SETPS_l2r : _FLR2R<0b0001111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<0b0001011000, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:lr, $src",
[(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
-def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setclk res[$src1], $src2",
- [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
-
-def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setrdy res[$src1], $src2",
- [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
-def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setpsc res[$src1], $src2",
- [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setn res[$src1], $src2", []>;
-def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "peek $dst, res[$src]",
- [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
-def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "endin $dst, res[$src]",
- [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "testlcl $dst, res[$src]", []>;
// One operand short
-// TODO edu, eeu, waitet, waitef, tstart, clrtp
-// setdp, setcp, setev, kcall
-// dgetreg
-def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
- "msync res[$i]",
- [(int_xcore_msync GRRegs:$i)]>;
-def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
- "mjoin res[$i]",
- [(int_xcore_mjoin GRRegs:$i)]>;
+def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a),
+ "msync res[$a]",
+ [(int_xcore_msync GRRegs:$a)]>;
+def MJOIN_1r : _F1R<0b000101, (outs), (ins GRRegs:$a),
+ "mjoin res[$a]",
+ [(int_xcore_mjoin GRRegs:$a)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
- "bau $addr",
- [(brind GRRegs:$addr)]>;
+def BAU_1r : _F1R<0b001001, (outs), (ins GRRegs:$a),
+ "bau $a",
+ [(brind GRRegs:$a)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
@@ -968,88 +924,150 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
"bru $i\n$t",
[(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BRU_1r : _F1R<0b001010, (outs), (ins GRRegs:$a), "bru $a", []>;
+
let Defs=[SP], neverHasSideEffects=1 in
-def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
- "set sp, $src",
- []>;
+def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
let hasCtrlDep = 1 in
-def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
- "ecallt $src",
+def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
+ "ecallt $a",
[]>;
let hasCtrlDep = 1 in
-def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
- "ecallf $src",
+def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a),
+ "ecallf $a",
[]>;
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BLA_1r : _F1R<(outs), (ins GRRegs:$addr),
- "bla $addr",
- [(XCoreBranchLink GRRegs:$addr)]>;
+def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a),
+ "bla $a",
+ [(XCoreBranchLink GRRegs:$a)]>;
}
-def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
- "syncr res[$r]",
- [(int_xcore_syncr GRRegs:$r)]>;
+def SYNCR_1r : _F1R<0b100001, (outs), (ins GRRegs:$a),
+ "syncr res[$a]",
+ [(int_xcore_syncr GRRegs:$a)]>;
-def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
- "freer res[$r]",
- [(int_xcore_freer GRRegs:$r)]>;
+def FREER_1r : _F1R<0b000100, (outs), (ins GRRegs:$a),
+ "freer res[$a]",
+ [(int_xcore_freer GRRegs:$a)]>;
let Uses=[R11] in {
-def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setv res[$r], r11",
- [(int_xcore_setv GRRegs:$r, R11)]>;
+def SETV_1r : _F1R<0b010001, (outs), (ins GRRegs:$a),
+ "setv res[$a], r11",
+ [(int_xcore_setv GRRegs:$a, R11)]>;
-def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setev res[$r], r11",
- [(int_xcore_setev GRRegs:$r, R11)]>;
+def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a),
+ "setev res[$a], r11",
+ [(int_xcore_setev GRRegs:$a, R11)]>;
}
-def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
- "eeu res[$r]",
- [(int_xcore_eeu GRRegs:$r)]>;
+def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>;
+
+def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>;
+
+def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a),
+ "eeu res[$a]",
+ [(int_xcore_eeu GRRegs:$a)]>;
+
+def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>;
+
+def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>;
+
+def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>;
+
+def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>;
+
+def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>;
// Zero operand short
-// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
-// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
-// dentsp, drestsp
-def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>;
+
+def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>;
+
+let Defs = [SP], Uses = [SP] in
+def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>;
+
+let Defs = [SP] in
+def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>;
+
+def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>;
+
+def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>;
let Defs = [R11] in {
-def GETID_0R : _F0R<(outs), (ins),
+def GETID_0R : _F0R<0b0001001110, (outs), (ins),
"get r11, id",
[(set R11, (int_xcore_getid))]>;
-def GETED_0R : _F0R<(outs), (ins),
+def GETED_0R : _F0R<0b0000111110, (outs), (ins),
"get r11, ed",
[(set R11, (int_xcore_geted))]>;
-def GETET_0R : _F0R<(outs), (ins),
+def GETET_0R : _F0R<0b0000111111, (outs), (ins),
"get r11, et",
[(set R11, (int_xcore_getet))]>;
+
+def GETKEP_0R : _F0R<0b0001001111, (outs), (ins),
+ "get r11, kep", []>;
+
+def GETKSP_0R : _F0R<0b0001011100, (outs), (ins),
+ "get r11, ksp", []>;
+}
+
+let Defs = [SP] in
+def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>;
+
+let Uses = [SP], mayLoad = 1 in {
+def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>;
+
+def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>;
+
+def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>;
+
+def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>;
}
-def SSYNC_0r : _F0R<(outs), (ins),
+let Uses=[R11] in
+def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>;
+
+def SSYNC_0r : _F0R<0b0000001110, (outs), (ins),
"ssync",
[(int_xcore_ssync)]>;
+let Uses = [SP], mayStore = 1 in {
+def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>;
+
+def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>;
+
+def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>;
+
+def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>;
+}
+
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
hasSideEffects = 1 in
-def WAITEU_0R : _F0R<(outs), (ins),
- "waiteu",
- [(brind (int_xcore_waitevent))]>;
+def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
-def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>;
/// sext_inreg
def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
@@ -1091,7 +1109,7 @@ def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
(ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
- (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+ (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
(STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
def : Pat<(store GRRegs:$val, GRRegs:$addr),
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
new file mode 100644
index 000000000000..f96eda9fcb9f
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower XCore MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+#include "XCoreMCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter)
+: Printer(asmprinter) {}
+
+void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) {
+ Mang = M;
+ Ctx = C;
+}
+
+MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy,
+ unsigned Offset) const {
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ const MCSymbol *Symbol;
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = Printer.GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = Printer.GetCPISymbol(MO.getIndex());
+ Offset += MO.getOffset();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+ if (!Offset)
+ return MCOperand::CreateExpr(MCSym);
+
+ // Assume offset is never negative.
+ assert(Offset > 0);
+
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
+}
+
+MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
+ unsigned offset) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm() + offset);
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, offset);
+ case MachineOperand::MO_RegisterMask:
+ break;
+ }
+
+ return MCOperand();
+}
+
+void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MO);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
new file mode 100644
index 000000000000..28e702bb9884
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -0,0 +1,42 @@
+//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCINSTLOWER_H
+#define XCOREMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class AsmPrinter;
+
+/// \brief This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext *Ctx;
+ Mangler *Mang;
+ AsmPrinter &Printer;
+public:
+ XCoreMCInstLower(class AsmPrinter &asmprinter);
+ void Initialize(Mangler *mang, MCContext *C);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy, unsigned Offset) const;
+};
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index f869fcf26de3..69d5de3e03ad 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -14,8 +14,8 @@
#ifndef XCOREMACHINEFUNCTIONINFO_H
#define XCOREMACHINEFUNCTIONINFO_H
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include <vector>
namespace llvm {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index be5855abcd0b..49b563497c0b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -12,25 +12,25 @@
//===----------------------------------------------------------------------===//
#include "XCoreRegisterInfo.h"
-#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_REGINFO_TARGET_DESC
#include "XCoreGenRegisterInfo.inc"
@@ -101,72 +101,14 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
return false;
}
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void XCoreRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
- // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- assert(Amount%4 == 0);
- Amount /= 4;
-
- bool isU6 = isImmU6(Amount);
- if (!isU6 && !isImmU16(Amount)) {
- // FIX could emit multiple instructions in this case.
-#ifndef NDEBUG
- errs() << "eliminateCallFramePseudoInstr size too big: "
- << Amount << "\n";
-#endif
- llvm_unreachable(0);
- }
-
- MachineInstr *New;
- if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
- int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
- .addImm(Amount);
- } else {
- assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
- int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
- .addImm(Amount);
- }
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
-
- MBB.erase(I);
-}
-
void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
- unsigned i = 0;
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- MachineOperand &FrameOp = MI.getOperand(i);
+ MachineOperand &FrameOp = MI.getOperand(FIOperandNum);
int FrameIndex = FrameOp.getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
@@ -190,14 +132,14 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of DBG_VALUE instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
// fold constant into offset.
- Offset += MI.getOperand(i + 1).getImm();
- MI.getOperand(i + 1).ChangeToImmediate(0);
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
assert(Offset%4 == 0 && "Misaligned stack offset");
@@ -231,7 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
.addReg(ScratchReg, RegState::Kill);
break;
case XCore::STWFI:
- BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r))
.addReg(Reg, getKillRegState(isKill))
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index c4dcb6b533c2..1db32489cf8d 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,12 +54,9 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 9edfda1f5007..6694b2882aca 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -45,12 +45,15 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
def GRRegs : RegisterClass<"XCore", [i32], 32,
// Return values and arguments
(add R0, R1, R2, R3,
- // Not preserved across procedure calls
- R11,
// Callee save
- R4, R5, R6, R7, R8, R9, R10)>;
+ R4, R5, R6, R7, R8, R9, R10,
+ // Not preserved across procedure calls
+ R11)>;
// Reserved
-def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+def RRegs : RegisterClass<"XCore", [i32], 32,
+ (add R0, R1, R2, R3,
+ R4, R5, R6, R7, R8, R9, R10,
+ R11, CP, DP, SP, LR)> {
let isAllocatable = 0;
}
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 8d0f254e087a..5ac4dbc4bc07 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -14,8 +14,8 @@
#ifndef XCORESUBTARGET_H
#define XCORESUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index d5a932c5189d..28c3d12c05fe 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -12,9 +12,9 @@
#include "XCoreTargetMachine.h"
#include "XCore.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
+ TSInfo(*this) {
}
namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c60c6a37f95b..eb9a1aa420eb 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,12 @@
#define XCORETARGETMACHINE_H
#include "XCoreFrameLowering.h"
-#include "XCoreSubtarget.h"
-#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
#include "XCoreSelectionDAGInfo.h"
+#include "XCoreSubtarget.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
namespace llvm {
@@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -56,12 +53,6 @@ public:
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 7f4e1c1b4fd7..820389935b38 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -11,8 +11,8 @@
#include "XCoreSubtarget.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;